Print this page
Hyperaggressive asserts pt 2/N
Be far more judicious in the use of curzone-using macros.
(Merge and extra asserts by danmcd.)
dss_paths[] entries need cleanup too
Try to remove assumption that zone's root vnode is marked VROOT
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 28 * All Rights Reserved
29 29 */
30 30
31 31 /*
32 32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33 33 * Copyright 2019 Nexenta Systems, Inc.
34 34 * Copyright 2019 Nexenta by DDN, Inc.
35 35 */
36 36
37 37 #include <sys/param.h>
38 38 #include <sys/types.h>
39 39 #include <sys/systm.h>
40 40 #include <sys/cred.h>
41 41 #include <sys/buf.h>
42 42 #include <sys/vfs.h>
43 43 #include <sys/vfs_opreg.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/uio.h>
46 46 #include <sys/errno.h>
47 47 #include <sys/sysmacros.h>
48 48 #include <sys/statvfs.h>
49 49 #include <sys/kmem.h>
50 50 #include <sys/dirent.h>
51 51 #include <sys/cmn_err.h>
52 52 #include <sys/debug.h>
53 53 #include <sys/systeminfo.h>
54 54 #include <sys/flock.h>
55 55 #include <sys/pathname.h>
56 56 #include <sys/nbmlock.h>
57 57 #include <sys/share.h>
58 58 #include <sys/atomic.h>
59 59 #include <sys/policy.h>
60 60 #include <sys/fem.h>
61 61 #include <sys/sdt.h>
62 62 #include <sys/ddi.h>
63 63 #include <sys/zone.h>
64 64
65 65 #include <fs/fs_reparse.h>
66 66
67 67 #include <rpc/types.h>
68 68 #include <rpc/auth.h>
69 69 #include <rpc/rpcsec_gss.h>
70 70 #include <rpc/svc.h>
71 71
72 72 #include <nfs/nfs.h>
73 73 #include <nfs/nfssys.h>
74 74 #include <nfs/export.h>
75 75 #include <nfs/nfs_cmd.h>
76 76 #include <nfs/lm.h>
77 77 #include <nfs/nfs4.h>
78 78 #include <nfs/nfs4_drc.h>
79 79
80 80 #include <sys/strsubr.h>
81 81 #include <sys/strsun.h>
82 82
83 83 #include <inet/common.h>
84 84 #include <inet/ip.h>
85 85 #include <inet/ip6.h>
86 86
87 87 #include <sys/tsol/label.h>
88 88 #include <sys/tsol/tndb.h>
89 89
90 90 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
91 91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
92 92 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
93 93 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
94 94 extern struct svc_ops rdma_svc_ops;
95 95 extern int nfs_loaned_buffers;
96 96 /* End of Tunables */
97 97
98 98 static int rdma_setup_read_data4(READ4args *, READ4res *);
99 99
100 100 /*
101 101 * Used to bump the stateid4.seqid value and show changes in the stateid
102 102 */
103 103 #define next_stateid(sp) (++(sp)->bits.chgseq)
104 104
105 105 /*
106 106 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
107 107 * This is used to return NFS4ERR_TOOSMALL when clients specify
108 108 * maxcount that isn't large enough to hold the smallest possible
109 109 * XDR encoded dirent.
110 110 *
111 111 * sizeof cookie (8 bytes) +
112 112 * sizeof name_len (4 bytes) +
113 113 * sizeof smallest (padded) name (4 bytes) +
114 114 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
115 115 * sizeof attrlist4_len (4 bytes) +
116 116 * sizeof next boolean (4 bytes)
117 117 *
118 118 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
119 119 * the smallest possible entry4 (assumes no attrs requested).
120 120 * sizeof nfsstat4 (4 bytes) +
121 121 * sizeof verifier4 (8 bytes) +
122 122 * sizeof entry4list bool (4 bytes) +
123 123 * sizeof entry4 (36 bytes) +
124 124 * sizeof eof bool (4 bytes)
125 125 *
126 126 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
127 127 * VOP_READDIR. Its value is the size of the maximum possible dirent
128 128 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
129 129 * required for a given name length. MAXNAMELEN is the maximum
130 130 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
131 131 * macros are to allow for . and .. entries -- just a minor tweak to try
132 132 * and guarantee that buffer we give to VOP_READDIR will be large enough
133 133 * to hold ., .., and the largest possible solaris dirent64.
134 134 */
135 135 #define RFS4_MINLEN_ENTRY4 36
136 136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
137 137 #define RFS4_MINLEN_RDDIR_BUF \
138 138 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
139 139
140 140 /*
141 141 * It would be better to pad to 4 bytes since that's what XDR would do,
142 142 * but the dirents UFS gives us are already padded to 8, so just take
143 143 * what we're given. Dircount is only a hint anyway. Currently the
144 144 * solaris kernel is ASCII only, so there's no point in calling the
145 145 * UTF8 functions.
146 146 *
147 147 * dirent64: named padded to provide 8 byte struct alignment
148 148 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
149 149 *
150 150 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
151 151 *
152 152 */
153 153 #define DIRENT64_TO_DIRCOUNT(dp) \
154 154 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
155 155
156 156
157 157 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
158 158
159 159 u_longlong_t nfs4_srv_caller_id;
160 160 uint_t nfs4_srv_vkey = 0;
161 161
162 162 void rfs4_init_compound_state(struct compound_state *);
163 163
164 164 static void nullfree(caddr_t);
165 165 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
166 166 struct compound_state *);
167 167 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
168 168 struct compound_state *);
169 169 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 170 struct compound_state *);
171 171 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 172 struct compound_state *);
173 173 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
174 174 struct compound_state *);
175 175 static void rfs4_op_create_free(nfs_resop4 *resop);
176 176 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
177 177 struct svc_req *, struct compound_state *);
178 178 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
179 179 struct svc_req *, struct compound_state *);
180 180 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 181 struct compound_state *);
182 182 static void rfs4_op_getattr_free(nfs_resop4 *);
183 183 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
184 184 struct compound_state *);
185 185 static void rfs4_op_getfh_free(nfs_resop4 *);
186 186 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 187 struct compound_state *);
188 188 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 189 struct compound_state *);
190 190 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 191 struct compound_state *);
192 192 static void lock_denied_free(nfs_resop4 *);
193 193 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
194 194 struct compound_state *);
195 195 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
196 196 struct compound_state *);
197 197 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 198 struct compound_state *);
199 199 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
200 200 struct compound_state *);
201 201 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
202 202 struct svc_req *req, struct compound_state *cs);
203 203 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
204 204 struct compound_state *);
205 205 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
206 206 struct compound_state *);
207 207 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
208 208 struct svc_req *, struct compound_state *);
209 209 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
210 210 struct svc_req *, struct compound_state *);
211 211 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
212 212 struct compound_state *);
213 213 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 214 struct compound_state *);
215 215 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 216 struct compound_state *);
217 217 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 218 struct compound_state *);
219 219 static void rfs4_op_read_free(nfs_resop4 *);
220 220 static void rfs4_op_readdir_free(nfs_resop4 *resop);
221 221 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 222 struct compound_state *);
223 223 static void rfs4_op_readlink_free(nfs_resop4 *);
224 224 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
225 225 struct svc_req *, struct compound_state *);
226 226 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
227 227 struct compound_state *);
228 228 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 229 struct compound_state *);
230 230 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 231 struct compound_state *);
232 232 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 233 struct compound_state *);
234 234 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 235 struct compound_state *);
236 236 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 237 struct compound_state *);
238 238 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 239 struct compound_state *);
240 240 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
241 241 struct compound_state *);
242 242 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
243 243 struct svc_req *, struct compound_state *);
244 244 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
245 245 struct svc_req *req, struct compound_state *);
246 246 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
247 247 struct compound_state *);
248 248 static void rfs4_op_secinfo_free(nfs_resop4 *);
249 249
250 250 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
251 251 struct svc_req *);
252 252 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
253 253 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
254 254
255 255
256 256 /*
257 257 * translation table for attrs
258 258 */
259 259 struct nfs4_ntov_table {
260 260 union nfs4_attr_u *na;
261 261 uint8_t amap[NFS4_MAXNUM_ATTRS];
262 262 int attrcnt;
263 263 bool_t vfsstat;
264 264 };
265 265
266 266 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
267 267 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
268 268 struct nfs4_svgetit_arg *sargp);
269 269
270 270 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
271 271 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
272 272 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
273 273
274 274 static void hanfsv4_failover(nfs4_srv_t *);
275 275
276 276 fem_t *deleg_rdops;
277 277 fem_t *deleg_wrops;
278 278
279 279 /*
280 280 * NFS4 op dispatch table
281 281 */
282 282
283 283 struct rfsv4disp {
284 284 void (*dis_proc)(); /* proc to call */
285 285 void (*dis_resfree)(); /* frees space allocated by proc */
286 286 int dis_flags; /* RPC_IDEMPOTENT, etc... */
287 287 };
288 288
289 289 static struct rfsv4disp rfsv4disptab[] = {
290 290 /*
291 291 * NFS VERSION 4
292 292 */
293 293
294 294 /* RFS_NULL = 0 */
295 295 {rfs4_op_illegal, nullfree, 0},
296 296
297 297 /* UNUSED = 1 */
298 298 {rfs4_op_illegal, nullfree, 0},
299 299
300 300 /* UNUSED = 2 */
301 301 {rfs4_op_illegal, nullfree, 0},
302 302
303 303 /* OP_ACCESS = 3 */
304 304 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
305 305
306 306 /* OP_CLOSE = 4 */
307 307 {rfs4_op_close, nullfree, 0},
308 308
309 309 /* OP_COMMIT = 5 */
310 310 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
311 311
312 312 /* OP_CREATE = 6 */
313 313 {rfs4_op_create, nullfree, 0},
314 314
315 315 /* OP_DELEGPURGE = 7 */
316 316 {rfs4_op_delegpurge, nullfree, 0},
317 317
318 318 /* OP_DELEGRETURN = 8 */
319 319 {rfs4_op_delegreturn, nullfree, 0},
320 320
321 321 /* OP_GETATTR = 9 */
322 322 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
323 323
324 324 /* OP_GETFH = 10 */
325 325 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
326 326
327 327 /* OP_LINK = 11 */
328 328 {rfs4_op_link, nullfree, 0},
329 329
330 330 /* OP_LOCK = 12 */
331 331 {rfs4_op_lock, lock_denied_free, 0},
332 332
333 333 /* OP_LOCKT = 13 */
334 334 {rfs4_op_lockt, lock_denied_free, 0},
335 335
336 336 /* OP_LOCKU = 14 */
337 337 {rfs4_op_locku, nullfree, 0},
338 338
339 339 /* OP_LOOKUP = 15 */
340 340 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
341 341
342 342 /* OP_LOOKUPP = 16 */
343 343 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
344 344
345 345 /* OP_NVERIFY = 17 */
346 346 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
347 347
348 348 /* OP_OPEN = 18 */
349 349 {rfs4_op_open, rfs4_free_reply, 0},
350 350
351 351 /* OP_OPENATTR = 19 */
352 352 {rfs4_op_openattr, nullfree, 0},
353 353
354 354 /* OP_OPEN_CONFIRM = 20 */
355 355 {rfs4_op_open_confirm, nullfree, 0},
356 356
357 357 /* OP_OPEN_DOWNGRADE = 21 */
358 358 {rfs4_op_open_downgrade, nullfree, 0},
359 359
360 360 /* OP_OPEN_PUTFH = 22 */
361 361 {rfs4_op_putfh, nullfree, RPC_ALL},
362 362
363 363 /* OP_PUTPUBFH = 23 */
364 364 {rfs4_op_putpubfh, nullfree, RPC_ALL},
365 365
366 366 /* OP_PUTROOTFH = 24 */
367 367 {rfs4_op_putrootfh, nullfree, RPC_ALL},
368 368
369 369 /* OP_READ = 25 */
370 370 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
371 371
372 372 /* OP_READDIR = 26 */
373 373 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
374 374
375 375 /* OP_READLINK = 27 */
376 376 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
377 377
378 378 /* OP_REMOVE = 28 */
379 379 {rfs4_op_remove, nullfree, 0},
380 380
381 381 /* OP_RENAME = 29 */
382 382 {rfs4_op_rename, nullfree, 0},
383 383
384 384 /* OP_RENEW = 30 */
385 385 {rfs4_op_renew, nullfree, 0},
386 386
387 387 /* OP_RESTOREFH = 31 */
388 388 {rfs4_op_restorefh, nullfree, RPC_ALL},
389 389
390 390 /* OP_SAVEFH = 32 */
391 391 {rfs4_op_savefh, nullfree, RPC_ALL},
392 392
393 393 /* OP_SECINFO = 33 */
394 394 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
395 395
396 396 /* OP_SETATTR = 34 */
397 397 {rfs4_op_setattr, nullfree, 0},
398 398
399 399 /* OP_SETCLIENTID = 35 */
400 400 {rfs4_op_setclientid, nullfree, 0},
401 401
402 402 /* OP_SETCLIENTID_CONFIRM = 36 */
403 403 {rfs4_op_setclientid_confirm, nullfree, 0},
404 404
405 405 /* OP_VERIFY = 37 */
406 406 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
407 407
408 408 /* OP_WRITE = 38 */
409 409 {rfs4_op_write, nullfree, 0},
410 410
411 411 /* OP_RELEASE_LOCKOWNER = 39 */
412 412 {rfs4_op_release_lockowner, nullfree, 0},
413 413 };
414 414
415 415 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
416 416
417 417 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
418 418
419 419 #ifdef DEBUG
420 420
421 421 int rfs4_fillone_debug = 0;
422 422 int rfs4_no_stub_access = 1;
423 423 int rfs4_rddir_debug = 0;
424 424
425 425 static char *rfs4_op_string[] = {
426 426 "rfs4_op_null",
427 427 "rfs4_op_1 unused",
428 428 "rfs4_op_2 unused",
429 429 "rfs4_op_access",
430 430 "rfs4_op_close",
431 431 "rfs4_op_commit",
432 432 "rfs4_op_create",
433 433 "rfs4_op_delegpurge",
434 434 "rfs4_op_delegreturn",
435 435 "rfs4_op_getattr",
436 436 "rfs4_op_getfh",
437 437 "rfs4_op_link",
438 438 "rfs4_op_lock",
439 439 "rfs4_op_lockt",
440 440 "rfs4_op_locku",
441 441 "rfs4_op_lookup",
442 442 "rfs4_op_lookupp",
443 443 "rfs4_op_nverify",
444 444 "rfs4_op_open",
445 445 "rfs4_op_openattr",
446 446 "rfs4_op_open_confirm",
447 447 "rfs4_op_open_downgrade",
448 448 "rfs4_op_putfh",
449 449 "rfs4_op_putpubfh",
450 450 "rfs4_op_putrootfh",
451 451 "rfs4_op_read",
452 452 "rfs4_op_readdir",
453 453 "rfs4_op_readlink",
454 454 "rfs4_op_remove",
455 455 "rfs4_op_rename",
456 456 "rfs4_op_renew",
457 457 "rfs4_op_restorefh",
458 458 "rfs4_op_savefh",
459 459 "rfs4_op_secinfo",
460 460 "rfs4_op_setattr",
461 461 "rfs4_op_setclientid",
462 462 "rfs4_op_setclient_confirm",
463 463 "rfs4_op_verify",
464 464 "rfs4_op_write",
465 465 "rfs4_op_release_lockowner",
466 466 "rfs4_op_illegal"
467 467 };
468 468 #endif
469 469
470 470 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
471 471
472 472 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
473 473
474 474 extern void rfs4_free_fs_locations4(fs_locations4 *);
475 475
476 476 #ifdef nextdp
477 477 #undef nextdp
478 478 #endif
479 479 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
480 480
481 481 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
482 482 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
483 483 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
484 484 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
485 485 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
486 486 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
487 487 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
488 488 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
489 489 NULL, NULL
490 490 };
491 491 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
492 492 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
493 493 VOPNAME_READ, { .femop_read = deleg_wr_read },
494 494 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
495 495 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
496 496 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
497 497 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
498 498 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
499 499 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
500 500 NULL, NULL
501 501 };
502 502
503 503 nfs4_srv_t *
504 504 nfs4_get_srv(void)
505 505 {
506 506 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
507 507 nfs4_srv_t *srv = ng->nfs4_srv;
508 508 ASSERT(srv != NULL);
509 509 return (srv);
510 510 }
511 511
512 512 void
513 513 rfs4_srv_zone_init(nfs_globals_t *ng)
514 514 {
515 515 nfs4_srv_t *nsrv4;
516 516 timespec32_t verf;
517 517
518 518 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
519 519
520 520 /*
521 521 * The following algorithm attempts to find a unique verifier
522 522 * to be used as the write verifier returned from the server
523 523 * to the client. It is important that this verifier change
524 524 * whenever the server reboots. Of secondary importance, it
525 525 * is important for the verifier to be unique between two
526 526 * different servers.
527 527 *
528 528 * Thus, an attempt is made to use the system hostid and the
529 529 * current time in seconds when the nfssrv kernel module is
530 530 * loaded. It is assumed that an NFS server will not be able
531 531 * to boot and then to reboot in less than a second. If the
532 532 * hostid has not been set, then the current high resolution
533 533 * time is used. This will ensure different verifiers each
534 534 * time the server reboots and minimize the chances that two
535 535 * different servers will have the same verifier.
536 536 * XXX - this is broken on LP64 kernels.
537 537 */
538 538 verf.tv_sec = (time_t)zone_get_hostid(NULL);
539 539 if (verf.tv_sec != 0) {
540 540 verf.tv_nsec = gethrestime_sec();
541 541 } else {
542 542 timespec_t tverf;
543 543
544 544 gethrestime(&tverf);
545 545 verf.tv_sec = (time_t)tverf.tv_sec;
546 546 verf.tv_nsec = tverf.tv_nsec;
547 547 }
548 548 nsrv4->write4verf = *(uint64_t *)&verf;
549 549
550 550 /* Used to manage create/destroy of server state */
551 551 nsrv4->nfs4_server_state = NULL;
552 552 nsrv4->nfs4_cur_servinst = NULL;
553 553 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
554 554 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
555 555 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
556 556 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
557 557 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
558 558
559 559 ng->nfs4_srv = nsrv4;
560 560 }
561 561
562 562 void
563 563 rfs4_srv_zone_fini(nfs_globals_t *ng)
564 564 {
565 565 nfs4_srv_t *nsrv4 = ng->nfs4_srv;
566 566
567 567 ng->nfs4_srv = NULL;
568 568
569 569 mutex_destroy(&nsrv4->deleg_lock);
570 570 mutex_destroy(&nsrv4->state_lock);
571 571 mutex_destroy(&nsrv4->servinst_lock);
572 572 rw_destroy(&nsrv4->deleg_policy_lock);
573 573
574 574 kmem_free(nsrv4, sizeof (*nsrv4));
575 575 }
576 576
577 577 void
578 578 rfs4_srvrinit(void)
579 579 {
580 580 extern void rfs4_attr_init();
581 581
582 582 rfs4_attr_init();
583 583
584 584 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
585 585 rfs4_disable_delegation();
586 586 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
587 587 &deleg_wrops) != 0) {
588 588 rfs4_disable_delegation();
589 589 fem_free(deleg_rdops);
590 590 }
591 591
592 592 nfs4_srv_caller_id = fs_new_caller_id();
593 593 lockt_sysid = lm_alloc_sysidt();
594 594 vsd_create(&nfs4_srv_vkey, NULL);
595 595 rfs4_state_g_init();
596 596 }
597 597
598 598 void
599 599 rfs4_srvrfini(void)
600 600 {
601 601 if (lockt_sysid != LM_NOSYSID) {
602 602 lm_free_sysidt(lockt_sysid);
603 603 lockt_sysid = LM_NOSYSID;
604 604 }
605 605
606 606 rfs4_state_g_fini();
607 607
608 608 fem_free(deleg_rdops);
609 609 fem_free(deleg_wrops);
610 610 }
611 611
612 612 void
613 613 rfs4_do_server_start(int server_upordown,
614 614 int srv_delegation, int cluster_booted)
615 615 {
616 616 nfs4_srv_t *nsrv4 = nfs4_get_srv();
617 617
618 618 /* Is this a warm start? */
619 619 if (server_upordown == NFS_SERVER_QUIESCED) {
620 620 cmn_err(CE_NOTE, "nfs4_srv: "
621 621 "server was previously quiesced; "
622 622 "existing NFSv4 state will be re-used");
623 623
624 624 /*
625 625 * HA-NFSv4: this is also the signal
626 626 * that a Resource Group failover has
627 627 * occurred.
628 628 */
629 629 if (cluster_booted)
630 630 hanfsv4_failover(nsrv4);
631 631 } else {
632 632 /* Cold start */
633 633 nsrv4->rfs4_start_time = 0;
634 634 rfs4_state_zone_init(nsrv4);
635 635 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
636 636 nfs4_drc_hash);
637 637
638 638 /*
639 639 * The nfsd service was started with the -s option
640 640 * we need to pull in any state from the paths indicated.
641 641 */
642 642 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
643 643 /* read in the stable storage state from these paths */
644 644 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
645 645 rfs4_dss_newpaths);
646 646 }
647 647 }
648 648
649 649 /* Check if delegation is to be enabled */
650 650 if (srv_delegation != FALSE)
651 651 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
652 652 }
653 653
654 654 void
655 655 rfs4_init_compound_state(struct compound_state *cs)
656 656 {
657 657 bzero(cs, sizeof (*cs));
658 658 cs->cont = TRUE;
659 659 cs->access = CS_ACCESS_DENIED;
660 660 cs->deleg = FALSE;
661 661 cs->mandlock = FALSE;
662 662 cs->fh.nfs_fh4_val = cs->fhbuf;
663 663 }
664 664
665 665 void
666 666 rfs4_grace_start(rfs4_servinst_t *sip)
667 667 {
668 668 rw_enter(&sip->rwlock, RW_WRITER);
669 669 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
670 670 sip->grace_period = rfs4_grace_period;
671 671 rw_exit(&sip->rwlock);
672 672 }
673 673
674 674 /*
675 675 * returns true if the instance's grace period has never been started
676 676 */
677 677 int
678 678 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
679 679 {
680 680 time_t start_time;
681 681
682 682 rw_enter(&sip->rwlock, RW_READER);
683 683 start_time = sip->start_time;
684 684 rw_exit(&sip->rwlock);
685 685
686 686 return (start_time == 0);
687 687 }
688 688
689 689 /*
690 690 * Indicates if server instance is within the
691 691 * grace period.
692 692 */
693 693 int
694 694 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
695 695 {
696 696 time_t grace_expiry;
697 697
698 698 rw_enter(&sip->rwlock, RW_READER);
699 699 grace_expiry = sip->start_time + sip->grace_period;
700 700 rw_exit(&sip->rwlock);
701 701
702 702 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
703 703 }
704 704
705 705 int
706 706 rfs4_clnt_in_grace(rfs4_client_t *cp)
707 707 {
708 708 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
709 709
710 710 return (rfs4_servinst_in_grace(cp->rc_server_instance));
711 711 }
712 712
713 713 /*
714 714 * reset all currently active grace periods
715 715 */
716 716 void
717 717 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
718 718 {
719 719 rfs4_servinst_t *sip;
720 720
721 721 mutex_enter(&nsrv4->servinst_lock);
722 722 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
723 723 if (rfs4_servinst_in_grace(sip))
724 724 rfs4_grace_start(sip);
725 725 mutex_exit(&nsrv4->servinst_lock);
726 726 }
727 727
728 728 /*
729 729 * start any new instances' grace periods
730 730 */
731 731 void
732 732 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
733 733 {
734 734 rfs4_servinst_t *sip;
735 735
736 736 mutex_enter(&nsrv4->servinst_lock);
737 737 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
738 738 if (rfs4_servinst_grace_new(sip))
739 739 rfs4_grace_start(sip);
740 740 mutex_exit(&nsrv4->servinst_lock);
741 741 }
742 742
743 743 static rfs4_dss_path_t *
744 744 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
745 745 char *path, unsigned index)
746 746 {
747 747 size_t len;
748 748 rfs4_dss_path_t *dss_path;
749 749
750 750 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
751 751
752 752 /*
753 753 * Take a copy of the string, since the original may be overwritten.
754 754 * Sadly, no strdup() in the kernel.
755 755 */
756 756 /* allow for NUL */
757 757 len = strlen(path) + 1;
758 758 dss_path->path = kmem_alloc(len, KM_SLEEP);
759 759 (void) strlcpy(dss_path->path, path, len);
760 760
761 761 /* associate with servinst */
762 762 dss_path->sip = sip;
763 763 dss_path->index = index;
764 764
765 765 /*
766 766 * Add to list of served paths.
767 767 * No locking required, as we're only ever called at startup.
768 768 */
769 769 if (nsrv4->dss_pathlist == NULL) {
770 770 /* this is the first dss_path_t */
771 771
772 772 /* needed for insque/remque */
773 773 dss_path->next = dss_path->prev = dss_path;
774 774
775 775 nsrv4->dss_pathlist = dss_path;
776 776 } else {
777 777 insque(dss_path, nsrv4->dss_pathlist);
778 778 }
779 779
780 780 return (dss_path);
781 781 }
782 782
783 783 /*
784 784 * Create a new server instance, and make it the currently active instance.
785 785 * Note that starting the grace period too early will reduce the clients'
786 786 * recovery window.
787 787 */
788 788 void
789 789 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
790 790 int dss_npaths, char **dss_paths)
791 791 {
792 792 unsigned i;
793 793 rfs4_servinst_t *sip;
794 794 rfs4_oldstate_t *oldstate;
795 795
796 796 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
797 797 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
798 798
799 799 sip->start_time = (time_t)0;
800 800 sip->grace_period = (time_t)0;
801 801 sip->next = NULL;
802 802 sip->prev = NULL;
803 803
804 804 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
805 805 /*
806 806 * This initial dummy entry is required to setup for insque/remque.
807 807 * It must be skipped over whenever the list is traversed.
808 808 */
809 809 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
810 810 /* insque/remque require initial list entry to be self-terminated */
|
↓ open down ↓ |
810 lines elided |
↑ open up ↑ |
811 811 oldstate->next = oldstate;
812 812 oldstate->prev = oldstate;
813 813 sip->oldstate = oldstate;
814 814
815 815
816 816 sip->dss_npaths = dss_npaths;
817 817 sip->dss_paths = kmem_alloc(dss_npaths *
818 818 sizeof (rfs4_dss_path_t *), KM_SLEEP);
819 819
820 820 for (i = 0; i < dss_npaths; i++) {
821 - /* CSTYLED */
822 - sip->dss_paths[i] = rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
821 + sip->dss_paths[i] =
822 + rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
823 823 }
824 824
825 825 mutex_enter(&nsrv4->servinst_lock);
826 826 if (nsrv4->nfs4_cur_servinst != NULL) {
827 827 /* add to linked list */
828 828 sip->prev = nsrv4->nfs4_cur_servinst;
829 829 nsrv4->nfs4_cur_servinst->next = sip;
830 830 }
831 831 if (start_grace)
832 832 rfs4_grace_start(sip);
833 833 /* make the new instance "current" */
834 834 nsrv4->nfs4_cur_servinst = sip;
835 835
836 836 mutex_exit(&nsrv4->servinst_lock);
837 837 }
838 838
839 839 /*
840 840 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
841 841 * all instances directly.
842 842 */
843 843 void
844 844 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
845 845 {
846 846 rfs4_servinst_t *sip, *prev, *current;
847 847 #ifdef DEBUG
848 848 int n = 0;
849 849 #endif
|
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
850 850
851 851 mutex_enter(&nsrv4->servinst_lock);
852 852 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
853 853 current = nsrv4->nfs4_cur_servinst;
854 854 nsrv4->nfs4_cur_servinst = NULL;
855 855 for (sip = current; sip != NULL; sip = prev) {
856 856 prev = sip->prev;
857 857 rw_destroy(&sip->rwlock);
858 858 if (sip->oldstate)
859 859 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
860 - if (sip->dss_paths)
860 + if (sip->dss_paths) {
861 + int i = sip->dss_npaths;
862 +
863 + while (i > 0) {
864 + i--;
865 + if (sip->dss_paths[i] != NULL) {
866 + char *path = sip->dss_paths[i]->path;
867 +
868 + if (path != NULL) {
869 + kmem_free(path,
870 + strlen(path) + 1);
871 + }
872 + kmem_free(sip->dss_paths[i],
873 + sizeof (rfs4_dss_path_t));
874 + }
875 + }
861 876 kmem_free(sip->dss_paths,
862 877 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
878 + }
863 879 kmem_free(sip, sizeof (rfs4_servinst_t));
864 880 #ifdef DEBUG
865 881 n++;
866 882 #endif
867 883 }
868 884 mutex_exit(&nsrv4->servinst_lock);
869 885 }
870 886
871 887 /*
872 888 * Assign the current server instance to a client_t.
873 889 * Should be called with cp->rc_dbe held.
874 890 */
875 891 void
876 892 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
877 893 rfs4_servinst_t *sip)
878 894 {
879 895 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
880 896
881 897 /*
882 898 * The lock ensures that if the current instance is in the process
883 899 * of changing, we will see the new one.
884 900 */
885 901 mutex_enter(&nsrv4->servinst_lock);
886 902 cp->rc_server_instance = sip;
887 903 mutex_exit(&nsrv4->servinst_lock);
888 904 }
889 905
890 906 rfs4_servinst_t *
891 907 rfs4_servinst(rfs4_client_t *cp)
892 908 {
893 909 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
894 910
895 911 return (cp->rc_server_instance);
896 912 }
897 913
898 914 /* ARGSUSED */
899 915 static void
900 916 nullfree(caddr_t resop)
901 917 {
902 918 }
903 919
904 920 /*
905 921 * This is a fall-through for invalid or not implemented (yet) ops
906 922 */
907 923 /* ARGSUSED */
908 924 static void
909 925 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
910 926 struct compound_state *cs)
911 927 {
912 928 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
913 929 }
914 930
915 931 /*
916 932 * Check if the security flavor, nfsnum, is in the flavor_list.
917 933 */
918 934 bool_t
919 935 in_flavor_list(int nfsnum, int *flavor_list, int count)
920 936 {
921 937 int i;
922 938
923 939 for (i = 0; i < count; i++) {
924 940 if (nfsnum == flavor_list[i])
925 941 return (TRUE);
926 942 }
927 943 return (FALSE);
928 944 }
929 945
|
↓ open down ↓ |
57 lines elided |
↑ open up ↑ |
930 946 /*
931 947 * Used by rfs4_op_secinfo to get the security information from the
932 948 * export structure associated with the component.
933 949 */
934 950 /* ARGSUSED */
935 951 static nfsstat4
936 952 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
937 953 {
938 954 int error, different_export = 0;
939 955 vnode_t *dvp, *vp;
940 - struct exportinfo *exi = NULL;
956 + struct exportinfo *exi;
941 957 fid_t fid;
942 958 uint_t count, i;
943 959 secinfo4 *resok_val;
944 960 struct secinfo *secp;
945 961 seconfig_t *si;
946 962 bool_t did_traverse = FALSE;
947 963 int dotdot, walk;
948 964 nfs_export_t *ne = nfs_get_export();
949 965
950 966 dvp = cs->vp;
967 + exi = cs->exi;
968 + ASSERT(exi != NULL);
951 969 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
952 970
953 971 /*
954 972 * If dotdotting, then need to check whether it's above the
955 973 * root of a filesystem, or above an export point.
956 974 */
957 975 if (dotdot) {
958 -
976 + ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
959 977 /*
960 978 * If dotdotting at the root of a filesystem, then
961 979 * need to traverse back to the mounted-on filesystem
962 980 * and do the dotdot lookup there.
963 981 */
964 - if (cs->vp->v_flag & VROOT) {
982 + if ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp)) {
965 983
966 984 /*
967 985 * If at the system root, then can
968 986 * go up no further.
969 987 */
970 988 if (VN_CMP(dvp, ZONE_ROOTVP()))
971 989 return (puterrno4(ENOENT));
972 990
973 991 /*
974 992 * Traverse back to the mounted-on filesystem
975 993 */
976 - dvp = untraverse(cs->vp);
994 + dvp = untraverse(dvp);
977 995
978 996 /*
979 997 * Set the different_export flag so we remember
980 998 * to pick up a new exportinfo entry for
981 999 * this new filesystem.
982 1000 */
983 1001 different_export = 1;
984 1002 } else {
985 1003
986 1004 /*
987 1005 * If dotdotting above an export point then set
988 1006 * the different_export to get new export info.
989 1007 */
990 - different_export = nfs_exported(cs->exi, cs->vp);
1008 + different_export = nfs_exported(exi, dvp);
991 1009 }
992 1010 }
993 1011
994 1012 /*
995 1013 * Get the vnode for the component "nm".
996 1014 */
997 1015 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
998 1016 NULL, NULL, NULL);
999 1017 if (error)
1000 1018 return (puterrno4(error));
1001 1019
1002 1020 /*
1003 1021 * If the vnode is in a pseudo filesystem, or if the security flavor
1004 1022 * used in the request is valid but not an explicitly shared flavor,
1005 1023 * or the access bit indicates that this is a limited access,
1006 1024 * check whether this vnode is visible.
1007 1025 */
1008 1026 if (!different_export &&
1009 - (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
1027 + (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1010 1028 cs->access & CS_ACCESS_LIMITED)) {
1011 - if (! nfs_visible(cs->exi, vp, &different_export)) {
1029 + if (! nfs_visible(exi, vp, &different_export)) {
1012 1030 VN_RELE(vp);
1013 1031 return (puterrno4(ENOENT));
1014 1032 }
1015 1033 }
1016 1034
1017 1035 /*
1018 1036 * If it's a mountpoint, then traverse it.
1019 1037 */
1020 1038 if (vn_ismntpt(vp)) {
1021 1039 if ((error = traverse(&vp)) != 0) {
1022 1040 VN_RELE(vp);
1023 1041 return (puterrno4(error));
1024 1042 }
1025 1043 /* remember that we had to traverse mountpoint */
1026 1044 did_traverse = TRUE;
1027 1045 different_export = 1;
1028 1046 } else if (vp->v_vfsp != dvp->v_vfsp) {
1029 1047 /*
1030 1048 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1031 1049 * then vp is probably an LOFS object. We don't need the
1032 1050 * realvp, we just need to know that we might have crossed
1033 1051 * a server fs boundary and need to call checkexport4.
1034 1052 * (LOFS lookup hides server fs mountpoints, and actually calls
1035 1053 * traverse)
1036 1054 */
1037 1055 different_export = 1;
1038 1056 }
1039 1057
1040 1058 /*
1041 1059 * Get the export information for it.
1042 1060 */
|
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
1043 1061 if (different_export) {
1044 1062
1045 1063 bzero(&fid, sizeof (fid));
1046 1064 fid.fid_len = MAXFIDSZ;
1047 1065 error = vop_fid_pseudo(vp, &fid);
1048 1066 if (error) {
1049 1067 VN_RELE(vp);
1050 1068 return (puterrno4(error));
1051 1069 }
1052 1070
1071 + /* We'll need to reassign "exi". */
1053 1072 if (dotdot)
1054 1073 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1055 1074 else
1056 1075 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1057 1076
1058 1077 if (exi == NULL) {
1059 1078 if (did_traverse == TRUE) {
1060 1079 /*
1061 1080 * If this vnode is a mounted-on vnode,
1062 1081 * but the mounted-on file system is not
1063 1082 * exported, send back the secinfo for
1064 1083 * the exported node that the mounted-on
1065 1084 * vnode lives in.
1066 1085 */
1067 1086 exi = cs->exi;
1068 1087 } else {
1069 1088 VN_RELE(vp);
1070 1089 return (puterrno4(EACCES));
1071 1090 }
1072 1091 }
1073 - } else {
1074 - exi = cs->exi;
1075 1092 }
1076 1093 ASSERT(exi != NULL);
1077 1094
1078 1095
1079 1096 /*
1080 1097 * Create the secinfo result based on the security information
1081 1098 * from the exportinfo structure (exi).
1082 1099 *
1083 1100 * Return all flavors for a pseudo node.
1084 1101 * For a real export node, return the flavor that the client
1085 1102 * has access with.
1086 1103 */
1087 1104 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1088 1105 if (PSEUDO(exi)) {
1089 1106 count = exi->exi_export.ex_seccnt; /* total sec count */
1090 1107 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1091 1108 secp = exi->exi_export.ex_secinfo;
1092 1109
1093 1110 for (i = 0; i < count; i++) {
1094 1111 si = &secp[i].s_secinfo;
1095 1112 resok_val[i].flavor = si->sc_rpcnum;
1096 1113 if (resok_val[i].flavor == RPCSEC_GSS) {
1097 1114 rpcsec_gss_info *info;
1098 1115
1099 1116 info = &resok_val[i].flavor_info;
1100 1117 info->qop = si->sc_qop;
1101 1118 info->service = (rpc_gss_svc_t)si->sc_service;
1102 1119
1103 1120 /* get oid opaque data */
1104 1121 info->oid.sec_oid4_len =
1105 1122 si->sc_gss_mech_type->length;
1106 1123 info->oid.sec_oid4_val = kmem_alloc(
1107 1124 si->sc_gss_mech_type->length, KM_SLEEP);
1108 1125 bcopy(
1109 1126 si->sc_gss_mech_type->elements,
1110 1127 info->oid.sec_oid4_val,
1111 1128 info->oid.sec_oid4_len);
1112 1129 }
1113 1130 }
1114 1131 resp->SECINFO4resok_len = count;
1115 1132 resp->SECINFO4resok_val = resok_val;
1116 1133 } else {
1117 1134 int ret_cnt = 0, k = 0;
1118 1135 int *flavor_list;
1119 1136
1120 1137 count = exi->exi_export.ex_seccnt; /* total sec count */
1121 1138 secp = exi->exi_export.ex_secinfo;
1122 1139
1123 1140 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1124 1141 /* find out which flavors to return */
1125 1142 for (i = 0; i < count; i ++) {
1126 1143 int access, flavor, perm;
1127 1144
1128 1145 flavor = secp[i].s_secinfo.sc_nfsnum;
1129 1146 perm = secp[i].s_flags;
1130 1147
1131 1148 access = nfsauth4_secinfo_access(exi, cs->req,
1132 1149 flavor, perm, cs->basecr);
1133 1150
1134 1151 if (! (access & NFSAUTH_DENIED) &&
1135 1152 ! (access & NFSAUTH_WRONGSEC)) {
1136 1153 flavor_list[ret_cnt] = flavor;
1137 1154 ret_cnt++;
1138 1155 }
1139 1156 }
1140 1157
1141 1158 /* Create the returning SECINFO value */
1142 1159 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1143 1160
1144 1161 for (i = 0; i < count; i++) {
1145 1162 /*
1146 1163 * If the flavor is in the flavor list,
1147 1164 * fill in resok_val.
1148 1165 */
1149 1166 si = &secp[i].s_secinfo;
1150 1167 if (in_flavor_list(si->sc_nfsnum,
1151 1168 flavor_list, ret_cnt)) {
1152 1169 resok_val[k].flavor = si->sc_rpcnum;
1153 1170 if (resok_val[k].flavor == RPCSEC_GSS) {
1154 1171 rpcsec_gss_info *info;
1155 1172
1156 1173 info = &resok_val[k].flavor_info;
1157 1174 info->qop = si->sc_qop;
1158 1175 info->service = (rpc_gss_svc_t)
1159 1176 si->sc_service;
1160 1177
1161 1178 /* get oid opaque data */
1162 1179 info->oid.sec_oid4_len =
1163 1180 si->sc_gss_mech_type->length;
1164 1181 info->oid.sec_oid4_val = kmem_alloc(
1165 1182 si->sc_gss_mech_type->length,
1166 1183 KM_SLEEP);
1167 1184 bcopy(si->sc_gss_mech_type->elements,
1168 1185 info->oid.sec_oid4_val,
1169 1186 info->oid.sec_oid4_len);
1170 1187 }
1171 1188 k++;
1172 1189 }
1173 1190 if (k >= ret_cnt)
1174 1191 break;
1175 1192 }
1176 1193 resp->SECINFO4resok_len = ret_cnt;
1177 1194 resp->SECINFO4resok_val = resok_val;
1178 1195 kmem_free(flavor_list, count * sizeof (int));
1179 1196 }
1180 1197
1181 1198 VN_RELE(vp);
1182 1199 return (NFS4_OK);
1183 1200 }
1184 1201
1185 1202 /*
1186 1203 * SECINFO (Operation 33): Obtain required security information on
1187 1204 * the component name in the format of (security-mechanism-oid, qop, service)
1188 1205 * triplets.
1189 1206 */
1190 1207 /* ARGSUSED */
1191 1208 static void
1192 1209 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1193 1210 struct compound_state *cs)
1194 1211 {
1195 1212 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1196 1213 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1197 1214 utf8string *utfnm = &args->name;
1198 1215 uint_t len;
1199 1216 char *nm;
1200 1217 struct sockaddr *ca;
1201 1218 char *name = NULL;
1202 1219 nfsstat4 status = NFS4_OK;
1203 1220
1204 1221 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1205 1222 SECINFO4args *, args);
1206 1223
1207 1224 /*
1208 1225 * Current file handle (cfh) should have been set before getting
1209 1226 * into this function. If not, return error.
1210 1227 */
1211 1228 if (cs->vp == NULL) {
1212 1229 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1213 1230 goto out;
1214 1231 }
1215 1232
1216 1233 if (cs->vp->v_type != VDIR) {
1217 1234 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1218 1235 goto out;
1219 1236 }
1220 1237
1221 1238 /*
1222 1239 * Verify the component name. If failed, error out, but
1223 1240 * do not error out if the component name is a "..".
1224 1241 * SECINFO will return its parents secinfo data for SECINFO "..".
1225 1242 */
1226 1243 status = utf8_dir_verify(utfnm);
1227 1244 if (status != NFS4_OK) {
1228 1245 if (utfnm->utf8string_len != 2 ||
1229 1246 utfnm->utf8string_val[0] != '.' ||
1230 1247 utfnm->utf8string_val[1] != '.') {
1231 1248 *cs->statusp = resp->status = status;
1232 1249 goto out;
1233 1250 }
1234 1251 }
1235 1252
1236 1253 nm = utf8_to_str(utfnm, &len, NULL);
1237 1254 if (nm == NULL) {
1238 1255 *cs->statusp = resp->status = NFS4ERR_INVAL;
1239 1256 goto out;
1240 1257 }
1241 1258
1242 1259 if (len > MAXNAMELEN) {
1243 1260 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1244 1261 kmem_free(nm, len);
1245 1262 goto out;
1246 1263 }
1247 1264
1248 1265 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1249 1266 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1250 1267 MAXPATHLEN + 1);
1251 1268
1252 1269 if (name == NULL) {
1253 1270 *cs->statusp = resp->status = NFS4ERR_INVAL;
1254 1271 kmem_free(nm, len);
1255 1272 goto out;
1256 1273 }
1257 1274
1258 1275
1259 1276 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1260 1277
1261 1278 if (name != nm)
1262 1279 kmem_free(name, MAXPATHLEN + 1);
1263 1280 kmem_free(nm, len);
1264 1281
1265 1282 out:
1266 1283 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1267 1284 SECINFO4res *, resp);
1268 1285 }
1269 1286
1270 1287 /*
1271 1288 * Free SECINFO result.
1272 1289 */
1273 1290 /* ARGSUSED */
1274 1291 static void
1275 1292 rfs4_op_secinfo_free(nfs_resop4 *resop)
1276 1293 {
1277 1294 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1278 1295 int count, i;
1279 1296 secinfo4 *resok_val;
1280 1297
1281 1298 /* If this is not an Ok result, nothing to free. */
1282 1299 if (resp->status != NFS4_OK) {
1283 1300 return;
1284 1301 }
1285 1302
1286 1303 count = resp->SECINFO4resok_len;
1287 1304 resok_val = resp->SECINFO4resok_val;
1288 1305
1289 1306 for (i = 0; i < count; i++) {
1290 1307 if (resok_val[i].flavor == RPCSEC_GSS) {
1291 1308 rpcsec_gss_info *info;
1292 1309
1293 1310 info = &resok_val[i].flavor_info;
1294 1311 kmem_free(info->oid.sec_oid4_val,
1295 1312 info->oid.sec_oid4_len);
1296 1313 }
1297 1314 }
1298 1315 kmem_free(resok_val, count * sizeof (secinfo4));
1299 1316 resp->SECINFO4resok_len = 0;
1300 1317 resp->SECINFO4resok_val = NULL;
1301 1318 }
1302 1319
1303 1320 /* ARGSUSED */
1304 1321 static void
1305 1322 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1306 1323 struct compound_state *cs)
1307 1324 {
1308 1325 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1309 1326 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1310 1327 int error;
1311 1328 vnode_t *vp;
1312 1329 struct vattr va;
1313 1330 int checkwriteperm;
1314 1331 cred_t *cr = cs->cr;
1315 1332 bslabel_t *clabel, *slabel;
1316 1333 ts_label_t *tslabel;
1317 1334 boolean_t admin_low_client;
1318 1335
1319 1336 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1320 1337 ACCESS4args *, args);
1321 1338
1322 1339 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1323 1340 if (cs->access == CS_ACCESS_DENIED) {
1324 1341 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1325 1342 goto out;
1326 1343 }
1327 1344 #endif
1328 1345 if (cs->vp == NULL) {
1329 1346 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1330 1347 goto out;
1331 1348 }
1332 1349
1333 1350 ASSERT(cr != NULL);
1334 1351
1335 1352 vp = cs->vp;
1336 1353
1337 1354 /*
1338 1355 * If the file system is exported read only, it is not appropriate
1339 1356 * to check write permissions for regular files and directories.
1340 1357 * Special files are interpreted by the client, so the underlying
1341 1358 * permissions are sent back to the client for interpretation.
1342 1359 */
1343 1360 if (rdonly4(req, cs) &&
1344 1361 (vp->v_type == VREG || vp->v_type == VDIR))
1345 1362 checkwriteperm = 0;
1346 1363 else
1347 1364 checkwriteperm = 1;
1348 1365
1349 1366 /*
1350 1367 * XXX
1351 1368 * We need the mode so that we can correctly determine access
1352 1369 * permissions relative to a mandatory lock file. Access to
1353 1370 * mandatory lock files is denied on the server, so it might
1354 1371 * as well be reflected to the server during the open.
1355 1372 */
1356 1373 va.va_mask = AT_MODE;
1357 1374 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1358 1375 if (error) {
1359 1376 *cs->statusp = resp->status = puterrno4(error);
1360 1377 goto out;
1361 1378 }
1362 1379 resp->access = 0;
1363 1380 resp->supported = 0;
1364 1381
1365 1382 if (is_system_labeled()) {
1366 1383 ASSERT(req->rq_label != NULL);
1367 1384 clabel = req->rq_label;
1368 1385 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1369 1386 "got client label from request(1)",
1370 1387 struct svc_req *, req);
1371 1388 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1372 1389 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1373 1390 *cs->statusp = resp->status = puterrno4(EACCES);
1374 1391 goto out;
1375 1392 }
1376 1393 slabel = label2bslabel(tslabel);
1377 1394 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1378 1395 char *, "got server label(1) for vp(2)",
1379 1396 bslabel_t *, slabel, vnode_t *, vp);
1380 1397
1381 1398 admin_low_client = B_FALSE;
1382 1399 } else
1383 1400 admin_low_client = B_TRUE;
1384 1401 }
1385 1402
1386 1403 if (args->access & ACCESS4_READ) {
1387 1404 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1388 1405 if (!error && !MANDLOCK(vp, va.va_mode) &&
1389 1406 (!is_system_labeled() || admin_low_client ||
1390 1407 bldominates(clabel, slabel)))
1391 1408 resp->access |= ACCESS4_READ;
1392 1409 resp->supported |= ACCESS4_READ;
1393 1410 }
1394 1411 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1395 1412 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1396 1413 if (!error && (!is_system_labeled() || admin_low_client ||
1397 1414 bldominates(clabel, slabel)))
1398 1415 resp->access |= ACCESS4_LOOKUP;
1399 1416 resp->supported |= ACCESS4_LOOKUP;
1400 1417 }
1401 1418 if (checkwriteperm &&
1402 1419 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1403 1420 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1404 1421 if (!error && !MANDLOCK(vp, va.va_mode) &&
1405 1422 (!is_system_labeled() || admin_low_client ||
1406 1423 blequal(clabel, slabel)))
1407 1424 resp->access |=
1408 1425 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1409 1426 resp->supported |=
1410 1427 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1411 1428 }
1412 1429
1413 1430 if (checkwriteperm &&
1414 1431 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1415 1432 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1416 1433 if (!error && (!is_system_labeled() || admin_low_client ||
1417 1434 blequal(clabel, slabel)))
1418 1435 resp->access |= ACCESS4_DELETE;
1419 1436 resp->supported |= ACCESS4_DELETE;
1420 1437 }
1421 1438 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1422 1439 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1423 1440 if (!error && !MANDLOCK(vp, va.va_mode) &&
1424 1441 (!is_system_labeled() || admin_low_client ||
1425 1442 bldominates(clabel, slabel)))
1426 1443 resp->access |= ACCESS4_EXECUTE;
1427 1444 resp->supported |= ACCESS4_EXECUTE;
1428 1445 }
1429 1446
1430 1447 if (is_system_labeled() && !admin_low_client)
1431 1448 label_rele(tslabel);
1432 1449
1433 1450 *cs->statusp = resp->status = NFS4_OK;
1434 1451 out:
1435 1452 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1436 1453 ACCESS4res *, resp);
1437 1454 }
1438 1455
1439 1456 /* ARGSUSED */
1440 1457 static void
1441 1458 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1442 1459 struct compound_state *cs)
1443 1460 {
1444 1461 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1445 1462 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1446 1463 int error;
1447 1464 vnode_t *vp = cs->vp;
1448 1465 cred_t *cr = cs->cr;
1449 1466 vattr_t va;
1450 1467 nfs4_srv_t *nsrv4;
1451 1468
1452 1469 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1453 1470 COMMIT4args *, args);
1454 1471
1455 1472 if (vp == NULL) {
1456 1473 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1457 1474 goto out;
1458 1475 }
1459 1476 if (cs->access == CS_ACCESS_DENIED) {
1460 1477 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1461 1478 goto out;
1462 1479 }
1463 1480
1464 1481 if (args->offset + args->count < args->offset) {
1465 1482 *cs->statusp = resp->status = NFS4ERR_INVAL;
1466 1483 goto out;
1467 1484 }
1468 1485
1469 1486 va.va_mask = AT_UID;
1470 1487 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1471 1488
1472 1489 /*
1473 1490 * If we can't get the attributes, then we can't do the
1474 1491 * right access checking. So, we'll fail the request.
1475 1492 */
1476 1493 if (error) {
1477 1494 *cs->statusp = resp->status = puterrno4(error);
1478 1495 goto out;
1479 1496 }
1480 1497 if (rdonly4(req, cs)) {
1481 1498 *cs->statusp = resp->status = NFS4ERR_ROFS;
1482 1499 goto out;
1483 1500 }
1484 1501
1485 1502 if (vp->v_type != VREG) {
1486 1503 if (vp->v_type == VDIR)
1487 1504 resp->status = NFS4ERR_ISDIR;
1488 1505 else
1489 1506 resp->status = NFS4ERR_INVAL;
1490 1507 *cs->statusp = resp->status;
1491 1508 goto out;
1492 1509 }
1493 1510
1494 1511 if (crgetuid(cr) != va.va_uid &&
1495 1512 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1496 1513 *cs->statusp = resp->status = puterrno4(error);
1497 1514 goto out;
1498 1515 }
1499 1516
1500 1517 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1501 1518
1502 1519 if (error) {
1503 1520 *cs->statusp = resp->status = puterrno4(error);
1504 1521 goto out;
1505 1522 }
1506 1523
1507 1524 nsrv4 = nfs4_get_srv();
1508 1525 *cs->statusp = resp->status = NFS4_OK;
1509 1526 resp->writeverf = nsrv4->write4verf;
1510 1527 out:
1511 1528 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1512 1529 COMMIT4res *, resp);
1513 1530 }
1514 1531
1515 1532 /*
1516 1533 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1517 1534 * was completed. It does the nfsv4 create for special files.
1518 1535 */
1519 1536 /* ARGSUSED */
1520 1537 static vnode_t *
1521 1538 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1522 1539 struct compound_state *cs, vattr_t *vap, char *nm)
1523 1540 {
1524 1541 int error;
1525 1542 cred_t *cr = cs->cr;
1526 1543 vnode_t *dvp = cs->vp;
1527 1544 vnode_t *vp = NULL;
1528 1545 int mode;
1529 1546 enum vcexcl excl;
1530 1547
1531 1548 switch (args->type) {
1532 1549 case NF4CHR:
1533 1550 case NF4BLK:
1534 1551 if (secpolicy_sys_devices(cr) != 0) {
1535 1552 *cs->statusp = resp->status = NFS4ERR_PERM;
1536 1553 return (NULL);
1537 1554 }
1538 1555 if (args->type == NF4CHR)
1539 1556 vap->va_type = VCHR;
1540 1557 else
1541 1558 vap->va_type = VBLK;
1542 1559 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1543 1560 args->ftype4_u.devdata.specdata2);
1544 1561 vap->va_mask |= AT_RDEV;
1545 1562 break;
1546 1563 case NF4SOCK:
1547 1564 vap->va_type = VSOCK;
1548 1565 break;
1549 1566 case NF4FIFO:
1550 1567 vap->va_type = VFIFO;
1551 1568 break;
1552 1569 default:
1553 1570 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1554 1571 return (NULL);
1555 1572 }
1556 1573
1557 1574 /*
1558 1575 * Must specify the mode.
1559 1576 */
1560 1577 if (!(vap->va_mask & AT_MODE)) {
1561 1578 *cs->statusp = resp->status = NFS4ERR_INVAL;
1562 1579 return (NULL);
1563 1580 }
1564 1581
1565 1582 excl = EXCL;
1566 1583
1567 1584 mode = 0;
1568 1585
1569 1586 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1570 1587 if (error) {
1571 1588 *cs->statusp = resp->status = puterrno4(error);
1572 1589 return (NULL);
1573 1590 }
1574 1591 return (vp);
1575 1592 }
1576 1593
1577 1594 /*
1578 1595 * nfsv4 create is used to create non-regular files. For regular files,
1579 1596 * use nfsv4 open.
1580 1597 */
1581 1598 /* ARGSUSED */
1582 1599 static void
1583 1600 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1584 1601 struct compound_state *cs)
1585 1602 {
1586 1603 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1587 1604 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1588 1605 int error;
1589 1606 struct vattr bva, iva, iva2, ava, *vap;
1590 1607 cred_t *cr = cs->cr;
1591 1608 vnode_t *dvp = cs->vp;
1592 1609 vnode_t *vp = NULL;
1593 1610 vnode_t *realvp;
1594 1611 char *nm, *lnm;
1595 1612 uint_t len, llen;
1596 1613 int syncval = 0;
1597 1614 struct nfs4_svgetit_arg sarg;
1598 1615 struct nfs4_ntov_table ntov;
1599 1616 struct statvfs64 sb;
1600 1617 nfsstat4 status;
1601 1618 struct sockaddr *ca;
1602 1619 char *name = NULL;
1603 1620 char *lname = NULL;
1604 1621
1605 1622 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1606 1623 CREATE4args *, args);
1607 1624
1608 1625 resp->attrset = 0;
1609 1626
1610 1627 if (dvp == NULL) {
1611 1628 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1612 1629 goto out;
1613 1630 }
1614 1631
1615 1632 /*
1616 1633 * If there is an unshared filesystem mounted on this vnode,
1617 1634 * do not allow to create an object in this directory.
1618 1635 */
1619 1636 if (vn_ismntpt(dvp)) {
1620 1637 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1621 1638 goto out;
1622 1639 }
1623 1640
1624 1641 /* Verify that type is correct */
1625 1642 switch (args->type) {
1626 1643 case NF4LNK:
1627 1644 case NF4BLK:
1628 1645 case NF4CHR:
1629 1646 case NF4SOCK:
1630 1647 case NF4FIFO:
1631 1648 case NF4DIR:
1632 1649 break;
1633 1650 default:
1634 1651 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1635 1652 goto out;
1636 1653 };
1637 1654
1638 1655 if (cs->access == CS_ACCESS_DENIED) {
1639 1656 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1640 1657 goto out;
1641 1658 }
1642 1659 if (dvp->v_type != VDIR) {
1643 1660 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1644 1661 goto out;
1645 1662 }
1646 1663 status = utf8_dir_verify(&args->objname);
1647 1664 if (status != NFS4_OK) {
1648 1665 *cs->statusp = resp->status = status;
1649 1666 goto out;
1650 1667 }
1651 1668
1652 1669 if (rdonly4(req, cs)) {
1653 1670 *cs->statusp = resp->status = NFS4ERR_ROFS;
1654 1671 goto out;
1655 1672 }
1656 1673
1657 1674 /*
1658 1675 * Name of newly created object
1659 1676 */
1660 1677 nm = utf8_to_fn(&args->objname, &len, NULL);
1661 1678 if (nm == NULL) {
1662 1679 *cs->statusp = resp->status = NFS4ERR_INVAL;
1663 1680 goto out;
1664 1681 }
1665 1682
1666 1683 if (len > MAXNAMELEN) {
1667 1684 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1668 1685 kmem_free(nm, len);
1669 1686 goto out;
1670 1687 }
1671 1688
1672 1689 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1673 1690 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1674 1691 MAXPATHLEN + 1);
1675 1692
1676 1693 if (name == NULL) {
1677 1694 *cs->statusp = resp->status = NFS4ERR_INVAL;
1678 1695 kmem_free(nm, len);
1679 1696 goto out;
1680 1697 }
1681 1698
1682 1699 resp->attrset = 0;
1683 1700
1684 1701 sarg.sbp = &sb;
1685 1702 sarg.is_referral = B_FALSE;
1686 1703 nfs4_ntov_table_init(&ntov);
1687 1704
1688 1705 status = do_rfs4_set_attrs(&resp->attrset,
1689 1706 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1690 1707
1691 1708 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1692 1709 status = NFS4ERR_INVAL;
1693 1710
1694 1711 if (status != NFS4_OK) {
1695 1712 *cs->statusp = resp->status = status;
1696 1713 if (name != nm)
1697 1714 kmem_free(name, MAXPATHLEN + 1);
1698 1715 kmem_free(nm, len);
1699 1716 nfs4_ntov_table_free(&ntov, &sarg);
1700 1717 resp->attrset = 0;
1701 1718 goto out;
1702 1719 }
1703 1720
1704 1721 /* Get "before" change value */
1705 1722 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1706 1723 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1707 1724 if (error) {
1708 1725 *cs->statusp = resp->status = puterrno4(error);
1709 1726 if (name != nm)
1710 1727 kmem_free(name, MAXPATHLEN + 1);
1711 1728 kmem_free(nm, len);
1712 1729 nfs4_ntov_table_free(&ntov, &sarg);
1713 1730 resp->attrset = 0;
1714 1731 goto out;
1715 1732 }
1716 1733 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1717 1734
1718 1735 vap = sarg.vap;
1719 1736
1720 1737 /*
1721 1738 * Set the default initial values for attributes when the parent
1722 1739 * directory does not have the VSUID/VSGID bit set and they have
1723 1740 * not been specified in createattrs.
1724 1741 */
1725 1742 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1726 1743 vap->va_uid = crgetuid(cr);
1727 1744 vap->va_mask |= AT_UID;
1728 1745 }
1729 1746 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1730 1747 vap->va_gid = crgetgid(cr);
1731 1748 vap->va_mask |= AT_GID;
1732 1749 }
1733 1750
1734 1751 vap->va_mask |= AT_TYPE;
1735 1752 switch (args->type) {
1736 1753 case NF4DIR:
1737 1754 vap->va_type = VDIR;
1738 1755 if ((vap->va_mask & AT_MODE) == 0) {
1739 1756 vap->va_mode = 0700; /* default: owner rwx only */
1740 1757 vap->va_mask |= AT_MODE;
1741 1758 }
1742 1759 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1743 1760 if (error)
1744 1761 break;
1745 1762
1746 1763 /*
1747 1764 * Get the initial "after" sequence number, if it fails,
1748 1765 * set to zero
1749 1766 */
1750 1767 iva.va_mask = AT_SEQ;
1751 1768 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1752 1769 iva.va_seq = 0;
1753 1770 break;
1754 1771 case NF4LNK:
1755 1772 vap->va_type = VLNK;
1756 1773 if ((vap->va_mask & AT_MODE) == 0) {
1757 1774 vap->va_mode = 0700; /* default: owner rwx only */
1758 1775 vap->va_mask |= AT_MODE;
1759 1776 }
1760 1777
1761 1778 /*
1762 1779 * symlink names must be treated as data
1763 1780 */
1764 1781 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1765 1782 &llen, NULL);
1766 1783
1767 1784 if (lnm == NULL) {
1768 1785 *cs->statusp = resp->status = NFS4ERR_INVAL;
1769 1786 if (name != nm)
1770 1787 kmem_free(name, MAXPATHLEN + 1);
1771 1788 kmem_free(nm, len);
1772 1789 nfs4_ntov_table_free(&ntov, &sarg);
1773 1790 resp->attrset = 0;
1774 1791 goto out;
1775 1792 }
1776 1793
1777 1794 if (llen > MAXPATHLEN) {
1778 1795 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1779 1796 if (name != nm)
1780 1797 kmem_free(name, MAXPATHLEN + 1);
1781 1798 kmem_free(nm, len);
1782 1799 kmem_free(lnm, llen);
1783 1800 nfs4_ntov_table_free(&ntov, &sarg);
1784 1801 resp->attrset = 0;
1785 1802 goto out;
1786 1803 }
1787 1804
1788 1805 lname = nfscmd_convname(ca, cs->exi, lnm,
1789 1806 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1790 1807
1791 1808 if (lname == NULL) {
1792 1809 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1793 1810 if (name != nm)
1794 1811 kmem_free(name, MAXPATHLEN + 1);
1795 1812 kmem_free(nm, len);
1796 1813 kmem_free(lnm, llen);
1797 1814 nfs4_ntov_table_free(&ntov, &sarg);
1798 1815 resp->attrset = 0;
1799 1816 goto out;
1800 1817 }
1801 1818
1802 1819 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1803 1820 if (lname != lnm)
1804 1821 kmem_free(lname, MAXPATHLEN + 1);
1805 1822 kmem_free(lnm, llen);
1806 1823 if (error)
1807 1824 break;
1808 1825
1809 1826 /*
1810 1827 * Get the initial "after" sequence number, if it fails,
1811 1828 * set to zero
1812 1829 */
1813 1830 iva.va_mask = AT_SEQ;
1814 1831 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1815 1832 iva.va_seq = 0;
1816 1833
1817 1834 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1818 1835 NULL, NULL, NULL);
1819 1836 if (error)
1820 1837 break;
1821 1838
1822 1839 /*
1823 1840 * va_seq is not safe over VOP calls, check it again
1824 1841 * if it has changed zero out iva to force atomic = FALSE.
1825 1842 */
1826 1843 iva2.va_mask = AT_SEQ;
1827 1844 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1828 1845 iva2.va_seq != iva.va_seq)
1829 1846 iva.va_seq = 0;
1830 1847 break;
1831 1848 default:
1832 1849 /*
1833 1850 * probably a special file.
1834 1851 */
1835 1852 if ((vap->va_mask & AT_MODE) == 0) {
1836 1853 vap->va_mode = 0600; /* default: owner rw only */
1837 1854 vap->va_mask |= AT_MODE;
1838 1855 }
1839 1856 syncval = FNODSYNC;
1840 1857 /*
1841 1858 * We know this will only generate one VOP call
1842 1859 */
1843 1860 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1844 1861
1845 1862 if (vp == NULL) {
1846 1863 if (name != nm)
1847 1864 kmem_free(name, MAXPATHLEN + 1);
1848 1865 kmem_free(nm, len);
1849 1866 nfs4_ntov_table_free(&ntov, &sarg);
1850 1867 resp->attrset = 0;
1851 1868 goto out;
1852 1869 }
1853 1870
1854 1871 /*
1855 1872 * Get the initial "after" sequence number, if it fails,
1856 1873 * set to zero
1857 1874 */
1858 1875 iva.va_mask = AT_SEQ;
1859 1876 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1860 1877 iva.va_seq = 0;
1861 1878
1862 1879 break;
1863 1880 }
1864 1881 if (name != nm)
1865 1882 kmem_free(name, MAXPATHLEN + 1);
1866 1883 kmem_free(nm, len);
1867 1884
1868 1885 if (error) {
1869 1886 *cs->statusp = resp->status = puterrno4(error);
1870 1887 }
1871 1888
1872 1889 /*
1873 1890 * Force modified data and metadata out to stable storage.
1874 1891 */
1875 1892 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1876 1893
1877 1894 if (resp->status != NFS4_OK) {
1878 1895 if (vp != NULL)
1879 1896 VN_RELE(vp);
1880 1897 nfs4_ntov_table_free(&ntov, &sarg);
1881 1898 resp->attrset = 0;
1882 1899 goto out;
1883 1900 }
1884 1901
1885 1902 /*
1886 1903 * Finish setup of cinfo response, "before" value already set.
1887 1904 * Get "after" change value, if it fails, simply return the
1888 1905 * before value.
1889 1906 */
1890 1907 ava.va_mask = AT_CTIME|AT_SEQ;
1891 1908 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1892 1909 ava.va_ctime = bva.va_ctime;
1893 1910 ava.va_seq = 0;
1894 1911 }
1895 1912 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1896 1913
1897 1914 /*
1898 1915 * True verification that object was created with correct
1899 1916 * attrs is impossible. The attrs could have been changed
1900 1917 * immediately after object creation. If attributes did
1901 1918 * not verify, the only recourse for the server is to
1902 1919 * destroy the object. Maybe if some attrs (like gid)
1903 1920 * are set incorrectly, the object should be destroyed;
1904 1921 * however, seems bad as a default policy. Do we really
1905 1922 * want to destroy an object over one of the times not
1906 1923 * verifying correctly? For these reasons, the server
1907 1924 * currently sets bits in attrset for createattrs
1908 1925 * that were set; however, no verification is done.
1909 1926 *
1910 1927 * vmask_to_nmask accounts for vattr bits set on create
1911 1928 * [do_rfs4_set_attrs() only sets resp bits for
1912 1929 * non-vattr/vfs bits.]
1913 1930 * Mask off any bits set by default so as not to return
1914 1931 * more attrset bits than were requested in createattrs
1915 1932 */
1916 1933 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1917 1934 resp->attrset &= args->createattrs.attrmask;
1918 1935 nfs4_ntov_table_free(&ntov, &sarg);
1919 1936
1920 1937 error = makefh4(&cs->fh, vp, cs->exi);
1921 1938 if (error) {
1922 1939 *cs->statusp = resp->status = puterrno4(error);
1923 1940 }
1924 1941
1925 1942 /*
1926 1943 * The cinfo.atomic = TRUE only if we got no errors, we have
1927 1944 * non-zero va_seq's, and it has incremented by exactly one
1928 1945 * during the creation and it didn't change during the VOP_LOOKUP
1929 1946 * or VOP_FSYNC.
1930 1947 */
1931 1948 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1932 1949 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1933 1950 resp->cinfo.atomic = TRUE;
1934 1951 else
1935 1952 resp->cinfo.atomic = FALSE;
1936 1953
1937 1954 /*
1938 1955 * Force modified metadata out to stable storage.
1939 1956 *
1940 1957 * if a underlying vp exists, pass it to VOP_FSYNC
1941 1958 */
1942 1959 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1943 1960 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1944 1961 else
1945 1962 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1946 1963
1947 1964 if (resp->status != NFS4_OK) {
1948 1965 VN_RELE(vp);
1949 1966 goto out;
1950 1967 }
1951 1968 if (cs->vp)
1952 1969 VN_RELE(cs->vp);
1953 1970
1954 1971 cs->vp = vp;
1955 1972 *cs->statusp = resp->status = NFS4_OK;
1956 1973 out:
1957 1974 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1958 1975 CREATE4res *, resp);
1959 1976 }
1960 1977
1961 1978 /*ARGSUSED*/
1962 1979 static void
1963 1980 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1964 1981 struct compound_state *cs)
1965 1982 {
1966 1983 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1967 1984 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1968 1985
1969 1986 rfs4_op_inval(argop, resop, req, cs);
1970 1987
1971 1988 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1972 1989 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1973 1990 }
1974 1991
1975 1992 /*ARGSUSED*/
1976 1993 static void
1977 1994 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1978 1995 struct compound_state *cs)
1979 1996 {
1980 1997 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1981 1998 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1982 1999 rfs4_deleg_state_t *dsp;
1983 2000 nfsstat4 status;
1984 2001
1985 2002 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1986 2003 DELEGRETURN4args *, args);
1987 2004
1988 2005 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1989 2006 resp->status = *cs->statusp = status;
1990 2007 if (status != NFS4_OK)
1991 2008 goto out;
1992 2009
1993 2010 /* Ensure specified filehandle matches */
1994 2011 if (cs->vp != dsp->rds_finfo->rf_vp) {
1995 2012 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1996 2013 } else
1997 2014 rfs4_return_deleg(dsp, FALSE);
1998 2015
1999 2016 rfs4_update_lease(dsp->rds_client);
2000 2017
2001 2018 rfs4_deleg_state_rele(dsp);
2002 2019 out:
2003 2020 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2004 2021 DELEGRETURN4res *, resp);
2005 2022 }
2006 2023
2007 2024 /*
2008 2025 * Check to see if a given "flavor" is an explicitly shared flavor.
2009 2026 * The assumption of this routine is the "flavor" is already a valid
2010 2027 * flavor in the secinfo list of "exi".
2011 2028 *
2012 2029 * e.g.
2013 2030 * # share -o sec=flavor1 /export
2014 2031 * # share -o sec=flavor2 /export/home
2015 2032 *
2016 2033 * flavor2 is not an explicitly shared flavor for /export,
2017 2034 * however it is in the secinfo list for /export thru the
2018 2035 * server namespace setup.
2019 2036 */
2020 2037 int
2021 2038 is_exported_sec(int flavor, struct exportinfo *exi)
2022 2039 {
2023 2040 int i;
2024 2041 struct secinfo *sp;
2025 2042
2026 2043 sp = exi->exi_export.ex_secinfo;
2027 2044 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2028 2045 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2029 2046 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2030 2047 return (SEC_REF_EXPORTED(&sp[i]));
2031 2048 }
2032 2049 }
2033 2050
2034 2051 /* Should not reach this point based on the assumption */
2035 2052 return (0);
2036 2053 }
2037 2054
2038 2055 /*
2039 2056 * Check if the security flavor used in the request matches what is
2040 2057 * required at the export point or at the root pseudo node (exi_root).
2041 2058 *
2042 2059 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2043 2060 *
2044 2061 */
2045 2062 static int
2046 2063 secinfo_match_or_authnone(struct compound_state *cs)
2047 2064 {
2048 2065 int i;
2049 2066 struct secinfo *sp;
2050 2067
2051 2068 /*
2052 2069 * Check cs->nfsflavor (from the request) against
2053 2070 * the current export data in cs->exi.
2054 2071 */
2055 2072 sp = cs->exi->exi_export.ex_secinfo;
2056 2073 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2057 2074 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2058 2075 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2059 2076 return (1);
2060 2077 }
2061 2078
2062 2079 return (0);
2063 2080 }
2064 2081
2065 2082 /*
2066 2083 * Check the access authority for the client and return the correct error.
2067 2084 */
2068 2085 nfsstat4
2069 2086 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2070 2087 {
2071 2088 int authres;
2072 2089
2073 2090 /*
2074 2091 * First, check if the security flavor used in the request
2075 2092 * are among the flavors set in the server namespace.
2076 2093 */
2077 2094 if (!secinfo_match_or_authnone(cs)) {
2078 2095 *cs->statusp = NFS4ERR_WRONGSEC;
2079 2096 return (*cs->statusp);
2080 2097 }
2081 2098
2082 2099 authres = checkauth4(cs, req);
2083 2100
2084 2101 if (authres > 0) {
2085 2102 *cs->statusp = NFS4_OK;
2086 2103 if (! (cs->access & CS_ACCESS_LIMITED))
2087 2104 cs->access = CS_ACCESS_OK;
2088 2105 } else if (authres == 0) {
2089 2106 *cs->statusp = NFS4ERR_ACCESS;
2090 2107 } else if (authres == -2) {
2091 2108 *cs->statusp = NFS4ERR_WRONGSEC;
2092 2109 } else {
2093 2110 *cs->statusp = NFS4ERR_DELAY;
2094 2111 }
2095 2112 return (*cs->statusp);
2096 2113 }
2097 2114
2098 2115 /*
2099 2116 * bitmap4_to_attrmask is called by getattr and readdir.
2100 2117 * It sets up the vattr mask and determines whether vfsstat call is needed
2101 2118 * based on the input bitmap.
2102 2119 * Returns nfsv4 status.
2103 2120 */
2104 2121 static nfsstat4
2105 2122 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2106 2123 {
2107 2124 int i;
2108 2125 uint_t va_mask;
2109 2126 struct statvfs64 *sbp = sargp->sbp;
2110 2127
2111 2128 sargp->sbp = NULL;
2112 2129 sargp->flag = 0;
2113 2130 sargp->rdattr_error = NFS4_OK;
2114 2131 sargp->mntdfid_set = FALSE;
2115 2132 if (sargp->cs->vp)
2116 2133 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2117 2134 FH4_ATTRDIR | FH4_NAMEDATTR);
2118 2135 else
2119 2136 sargp->xattr = 0;
2120 2137
2121 2138 /*
2122 2139 * Set rdattr_error_req to true if return error per
2123 2140 * failed entry rather than fail the readdir.
2124 2141 */
2125 2142 if (breq & FATTR4_RDATTR_ERROR_MASK)
2126 2143 sargp->rdattr_error_req = 1;
2127 2144 else
2128 2145 sargp->rdattr_error_req = 0;
2129 2146
2130 2147 /*
2131 2148 * generate the va_mask
2132 2149 * Handle the easy cases first
2133 2150 */
2134 2151 switch (breq) {
2135 2152 case NFS4_NTOV_ATTR_MASK:
2136 2153 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2137 2154 return (NFS4_OK);
2138 2155
2139 2156 case NFS4_FS_ATTR_MASK:
2140 2157 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2141 2158 sargp->sbp = sbp;
2142 2159 return (NFS4_OK);
2143 2160
2144 2161 case NFS4_NTOV_ATTR_CACHE_MASK:
2145 2162 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2146 2163 return (NFS4_OK);
2147 2164
2148 2165 case FATTR4_LEASE_TIME_MASK:
2149 2166 sargp->vap->va_mask = 0;
2150 2167 return (NFS4_OK);
2151 2168
2152 2169 default:
2153 2170 va_mask = 0;
2154 2171 for (i = 0; i < nfs4_ntov_map_size; i++) {
2155 2172 if ((breq & nfs4_ntov_map[i].fbit) &&
2156 2173 nfs4_ntov_map[i].vbit)
2157 2174 va_mask |= nfs4_ntov_map[i].vbit;
2158 2175 }
2159 2176
2160 2177 /*
2161 2178 * Check is vfsstat is needed
2162 2179 */
2163 2180 if (breq & NFS4_FS_ATTR_MASK)
2164 2181 sargp->sbp = sbp;
2165 2182
2166 2183 sargp->vap->va_mask = va_mask;
2167 2184 return (NFS4_OK);
2168 2185 }
2169 2186 /* NOTREACHED */
2170 2187 }
2171 2188
2172 2189 /*
2173 2190 * bitmap4_get_sysattrs is called by getattr and readdir.
2174 2191 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2175 2192 * Returns nfsv4 status.
2176 2193 */
2177 2194 static nfsstat4
2178 2195 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2179 2196 {
2180 2197 int error;
2181 2198 struct compound_state *cs = sargp->cs;
2182 2199 vnode_t *vp = cs->vp;
2183 2200
2184 2201 if (sargp->sbp != NULL) {
2185 2202 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2186 2203 sargp->sbp = NULL; /* to identify error */
2187 2204 return (puterrno4(error));
2188 2205 }
2189 2206 }
2190 2207
2191 2208 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2192 2209 }
2193 2210
2194 2211 static void
2195 2212 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2196 2213 {
2197 2214 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2198 2215 KM_SLEEP);
2199 2216 ntovp->attrcnt = 0;
2200 2217 ntovp->vfsstat = FALSE;
2201 2218 }
2202 2219
2203 2220 static void
2204 2221 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2205 2222 struct nfs4_svgetit_arg *sargp)
2206 2223 {
2207 2224 int i;
2208 2225 union nfs4_attr_u *na;
2209 2226 uint8_t *amap;
2210 2227
2211 2228 /*
2212 2229 * XXX Should do the same checks for whether the bit is set
2213 2230 */
2214 2231 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2215 2232 i < ntovp->attrcnt; i++, na++, amap++) {
2216 2233 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2217 2234 NFS4ATTR_FREEIT, sargp, na);
2218 2235 }
2219 2236 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2220 2237 /*
2221 2238 * xdr_free for getattr will be done later
2222 2239 */
2223 2240 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2224 2241 i < ntovp->attrcnt; i++, na++, amap++) {
2225 2242 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2226 2243 }
2227 2244 }
2228 2245 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2229 2246 }
2230 2247
2231 2248 /*
2232 2249 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2233 2250 */
2234 2251 static nfsstat4
2235 2252 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2236 2253 struct nfs4_svgetit_arg *sargp)
2237 2254 {
2238 2255 int error = 0;
2239 2256 int i, k;
2240 2257 struct nfs4_ntov_table ntov;
2241 2258 XDR xdr;
2242 2259 ulong_t xdr_size;
2243 2260 char *xdr_attrs;
2244 2261 nfsstat4 status = NFS4_OK;
2245 2262 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2246 2263 union nfs4_attr_u *na;
2247 2264 uint8_t *amap;
2248 2265
2249 2266 sargp->op = NFS4ATTR_GETIT;
2250 2267 sargp->flag = 0;
2251 2268
2252 2269 fattrp->attrmask = 0;
2253 2270 /* if no bits requested, then return empty fattr4 */
2254 2271 if (breq == 0) {
2255 2272 fattrp->attrlist4_len = 0;
2256 2273 fattrp->attrlist4 = NULL;
2257 2274 return (NFS4_OK);
2258 2275 }
2259 2276
2260 2277 /*
2261 2278 * return NFS4ERR_INVAL when client requests write-only attrs
2262 2279 */
2263 2280 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2264 2281 return (NFS4ERR_INVAL);
2265 2282
2266 2283 nfs4_ntov_table_init(&ntov);
2267 2284 na = ntov.na;
2268 2285 amap = ntov.amap;
2269 2286
2270 2287 /*
2271 2288 * Now loop to get or verify the attrs
2272 2289 */
2273 2290 for (i = 0; i < nfs4_ntov_map_size; i++) {
2274 2291 if (breq & nfs4_ntov_map[i].fbit) {
2275 2292 if ((*nfs4_ntov_map[i].sv_getit)(
2276 2293 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2277 2294
2278 2295 error = (*nfs4_ntov_map[i].sv_getit)(
2279 2296 NFS4ATTR_GETIT, sargp, na);
2280 2297
2281 2298 /*
2282 2299 * Possible error values:
2283 2300 * >0 if sv_getit failed to
2284 2301 * get the attr; 0 if succeeded;
2285 2302 * <0 if rdattr_error and the
2286 2303 * attribute cannot be returned.
2287 2304 */
2288 2305 if (error && !(sargp->rdattr_error_req))
2289 2306 goto done;
2290 2307 /*
2291 2308 * If error then just for entry
2292 2309 */
2293 2310 if (error == 0) {
2294 2311 fattrp->attrmask |=
2295 2312 nfs4_ntov_map[i].fbit;
2296 2313 *amap++ =
2297 2314 (uint8_t)nfs4_ntov_map[i].nval;
2298 2315 na++;
2299 2316 (ntov.attrcnt)++;
2300 2317 } else if ((error > 0) &&
2301 2318 (sargp->rdattr_error == NFS4_OK)) {
2302 2319 sargp->rdattr_error = puterrno4(error);
2303 2320 }
2304 2321 error = 0;
2305 2322 }
2306 2323 }
2307 2324 }
2308 2325
2309 2326 /*
2310 2327 * If rdattr_error was set after the return value for it was assigned,
2311 2328 * update it.
2312 2329 */
2313 2330 if (prev_rdattr_error != sargp->rdattr_error) {
2314 2331 na = ntov.na;
2315 2332 amap = ntov.amap;
2316 2333 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2317 2334 k = *amap;
2318 2335 if (k < FATTR4_RDATTR_ERROR) {
2319 2336 continue;
2320 2337 }
2321 2338 if ((k == FATTR4_RDATTR_ERROR) &&
2322 2339 ((*nfs4_ntov_map[k].sv_getit)(
2323 2340 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2324 2341
2325 2342 (void) (*nfs4_ntov_map[k].sv_getit)(
2326 2343 NFS4ATTR_GETIT, sargp, na);
2327 2344 }
2328 2345 break;
2329 2346 }
2330 2347 }
2331 2348
2332 2349 xdr_size = 0;
2333 2350 na = ntov.na;
2334 2351 amap = ntov.amap;
2335 2352 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2336 2353 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2337 2354 }
2338 2355
2339 2356 fattrp->attrlist4_len = xdr_size;
2340 2357 if (xdr_size) {
2341 2358 /* freed by rfs4_op_getattr_free() */
2342 2359 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2343 2360
2344 2361 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2345 2362
2346 2363 na = ntov.na;
2347 2364 amap = ntov.amap;
2348 2365 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2349 2366 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2350 2367 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2351 2368 int, *amap);
2352 2369 status = NFS4ERR_SERVERFAULT;
2353 2370 break;
2354 2371 }
2355 2372 }
2356 2373 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2357 2374 } else {
2358 2375 fattrp->attrlist4 = NULL;
2359 2376 }
2360 2377 done:
2361 2378
2362 2379 nfs4_ntov_table_free(&ntov, sargp);
2363 2380
2364 2381 if (error != 0)
2365 2382 status = puterrno4(error);
2366 2383
2367 2384 return (status);
2368 2385 }
2369 2386
2370 2387 /* ARGSUSED */
2371 2388 static void
2372 2389 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2373 2390 struct compound_state *cs)
2374 2391 {
2375 2392 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2376 2393 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2377 2394 struct nfs4_svgetit_arg sarg;
2378 2395 struct statvfs64 sb;
2379 2396 nfsstat4 status;
2380 2397
2381 2398 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2382 2399 GETATTR4args *, args);
2383 2400
2384 2401 if (cs->vp == NULL) {
2385 2402 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2386 2403 goto out;
2387 2404 }
2388 2405
2389 2406 if (cs->access == CS_ACCESS_DENIED) {
2390 2407 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2391 2408 goto out;
2392 2409 }
2393 2410
2394 2411 sarg.sbp = &sb;
2395 2412 sarg.cs = cs;
2396 2413 sarg.is_referral = B_FALSE;
2397 2414
2398 2415 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2399 2416 if (status == NFS4_OK) {
2400 2417
2401 2418 status = bitmap4_get_sysattrs(&sarg);
2402 2419 if (status == NFS4_OK) {
2403 2420
2404 2421 /* Is this a referral? */
2405 2422 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2406 2423 /* Older V4 Solaris client sees a link */
2407 2424 if (client_is_downrev(req))
2408 2425 sarg.vap->va_type = VLNK;
2409 2426 else
2410 2427 sarg.is_referral = B_TRUE;
2411 2428 }
2412 2429
2413 2430 status = do_rfs4_op_getattr(args->attr_request,
2414 2431 &resp->obj_attributes, &sarg);
2415 2432 }
2416 2433 }
2417 2434 *cs->statusp = resp->status = status;
2418 2435 out:
2419 2436 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2420 2437 GETATTR4res *, resp);
2421 2438 }
2422 2439
2423 2440 static void
2424 2441 rfs4_op_getattr_free(nfs_resop4 *resop)
2425 2442 {
2426 2443 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2427 2444
2428 2445 nfs4_fattr4_free(&resp->obj_attributes);
2429 2446 }
2430 2447
2431 2448 /* ARGSUSED */
2432 2449 static void
2433 2450 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2434 2451 struct compound_state *cs)
2435 2452 {
2436 2453 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2437 2454
2438 2455 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2439 2456
2440 2457 if (cs->vp == NULL) {
2441 2458 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2442 2459 goto out;
2443 2460 }
2444 2461 if (cs->access == CS_ACCESS_DENIED) {
2445 2462 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2446 2463 goto out;
2447 2464 }
2448 2465
2449 2466 /* check for reparse point at the share point */
2450 2467 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2451 2468 /* it's all bad */
2452 2469 cs->exi->exi_moved = 1;
2453 2470 *cs->statusp = resp->status = NFS4ERR_MOVED;
2454 2471 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2455 2472 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2456 2473 return;
2457 2474 }
2458 2475
2459 2476 /* check for reparse point at vp */
2460 2477 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2461 2478 /* it's not all bad */
2462 2479 *cs->statusp = resp->status = NFS4ERR_MOVED;
2463 2480 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2464 2481 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2465 2482 return;
2466 2483 }
2467 2484
2468 2485 resp->object.nfs_fh4_val =
2469 2486 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2470 2487 nfs_fh4_copy(&cs->fh, &resp->object);
2471 2488 *cs->statusp = resp->status = NFS4_OK;
2472 2489 out:
2473 2490 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2474 2491 GETFH4res *, resp);
2475 2492 }
2476 2493
2477 2494 static void
2478 2495 rfs4_op_getfh_free(nfs_resop4 *resop)
2479 2496 {
2480 2497 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2481 2498
2482 2499 if (resp->status == NFS4_OK &&
2483 2500 resp->object.nfs_fh4_val != NULL) {
2484 2501 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2485 2502 resp->object.nfs_fh4_val = NULL;
2486 2503 resp->object.nfs_fh4_len = 0;
2487 2504 }
2488 2505 }
2489 2506
2490 2507 /*
2491 2508 * illegal: args: void
2492 2509 * res : status (NFS4ERR_OP_ILLEGAL)
2493 2510 */
2494 2511 /* ARGSUSED */
2495 2512 static void
2496 2513 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2497 2514 struct svc_req *req, struct compound_state *cs)
2498 2515 {
2499 2516 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2500 2517
2501 2518 resop->resop = OP_ILLEGAL;
2502 2519 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2503 2520 }
2504 2521
2505 2522 /*
2506 2523 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2507 2524 * res: status. If success - CURRENT_FH unchanged, return change_info
2508 2525 */
2509 2526 /* ARGSUSED */
2510 2527 static void
2511 2528 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2512 2529 struct compound_state *cs)
2513 2530 {
2514 2531 LINK4args *args = &argop->nfs_argop4_u.oplink;
2515 2532 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2516 2533 int error;
2517 2534 vnode_t *vp;
2518 2535 vnode_t *dvp;
2519 2536 struct vattr bdva, idva, adva;
2520 2537 char *nm;
2521 2538 uint_t len;
2522 2539 struct sockaddr *ca;
2523 2540 char *name = NULL;
2524 2541 nfsstat4 status;
2525 2542
2526 2543 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2527 2544 LINK4args *, args);
2528 2545
2529 2546 /* SAVED_FH: source object */
2530 2547 vp = cs->saved_vp;
2531 2548 if (vp == NULL) {
2532 2549 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2533 2550 goto out;
2534 2551 }
2535 2552
2536 2553 /* CURRENT_FH: target directory */
2537 2554 dvp = cs->vp;
2538 2555 if (dvp == NULL) {
2539 2556 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2540 2557 goto out;
2541 2558 }
2542 2559
2543 2560 /*
2544 2561 * If there is a non-shared filesystem mounted on this vnode,
2545 2562 * do not allow to link any file in this directory.
2546 2563 */
2547 2564 if (vn_ismntpt(dvp)) {
2548 2565 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2549 2566 goto out;
2550 2567 }
2551 2568
2552 2569 if (cs->access == CS_ACCESS_DENIED) {
2553 2570 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2554 2571 goto out;
2555 2572 }
2556 2573
2557 2574 /* Check source object's type validity */
2558 2575 if (vp->v_type == VDIR) {
2559 2576 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2560 2577 goto out;
2561 2578 }
2562 2579
2563 2580 /* Check target directory's type */
2564 2581 if (dvp->v_type != VDIR) {
2565 2582 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2566 2583 goto out;
2567 2584 }
2568 2585
2569 2586 if (cs->saved_exi != cs->exi) {
2570 2587 *cs->statusp = resp->status = NFS4ERR_XDEV;
2571 2588 goto out;
2572 2589 }
2573 2590
2574 2591 status = utf8_dir_verify(&args->newname);
2575 2592 if (status != NFS4_OK) {
2576 2593 *cs->statusp = resp->status = status;
2577 2594 goto out;
2578 2595 }
2579 2596
2580 2597 nm = utf8_to_fn(&args->newname, &len, NULL);
2581 2598 if (nm == NULL) {
2582 2599 *cs->statusp = resp->status = NFS4ERR_INVAL;
2583 2600 goto out;
2584 2601 }
2585 2602
2586 2603 if (len > MAXNAMELEN) {
2587 2604 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2588 2605 kmem_free(nm, len);
2589 2606 goto out;
2590 2607 }
2591 2608
2592 2609 if (rdonly4(req, cs)) {
2593 2610 *cs->statusp = resp->status = NFS4ERR_ROFS;
2594 2611 kmem_free(nm, len);
2595 2612 goto out;
2596 2613 }
2597 2614
2598 2615 /* Get "before" change value */
2599 2616 bdva.va_mask = AT_CTIME|AT_SEQ;
2600 2617 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2601 2618 if (error) {
2602 2619 *cs->statusp = resp->status = puterrno4(error);
2603 2620 kmem_free(nm, len);
2604 2621 goto out;
2605 2622 }
2606 2623
2607 2624 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2608 2625 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2609 2626 MAXPATHLEN + 1);
2610 2627
2611 2628 if (name == NULL) {
2612 2629 *cs->statusp = resp->status = NFS4ERR_INVAL;
2613 2630 kmem_free(nm, len);
2614 2631 goto out;
2615 2632 }
2616 2633
2617 2634 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2618 2635
2619 2636 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2620 2637
2621 2638 if (nm != name)
2622 2639 kmem_free(name, MAXPATHLEN + 1);
2623 2640 kmem_free(nm, len);
2624 2641
2625 2642 /*
2626 2643 * Get the initial "after" sequence number, if it fails, set to zero
2627 2644 */
2628 2645 idva.va_mask = AT_SEQ;
2629 2646 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2630 2647 idva.va_seq = 0;
2631 2648
2632 2649 /*
2633 2650 * Force modified data and metadata out to stable storage.
2634 2651 */
2635 2652 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2636 2653 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2637 2654
2638 2655 if (error) {
2639 2656 *cs->statusp = resp->status = puterrno4(error);
2640 2657 goto out;
2641 2658 }
2642 2659
2643 2660 /*
2644 2661 * Get "after" change value, if it fails, simply return the
2645 2662 * before value.
2646 2663 */
2647 2664 adva.va_mask = AT_CTIME|AT_SEQ;
2648 2665 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2649 2666 adva.va_ctime = bdva.va_ctime;
2650 2667 adva.va_seq = 0;
2651 2668 }
2652 2669
2653 2670 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2654 2671
2655 2672 /*
2656 2673 * The cinfo.atomic = TRUE only if we have
2657 2674 * non-zero va_seq's, and it has incremented by exactly one
2658 2675 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2659 2676 */
2660 2677 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2661 2678 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2662 2679 resp->cinfo.atomic = TRUE;
2663 2680 else
2664 2681 resp->cinfo.atomic = FALSE;
2665 2682
2666 2683 *cs->statusp = resp->status = NFS4_OK;
2667 2684 out:
2668 2685 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2669 2686 LINK4res *, resp);
2670 2687 }
2671 2688
2672 2689 /*
2673 2690 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2674 2691 */
2675 2692
2676 2693 /* ARGSUSED */
2677 2694 static nfsstat4
2678 2695 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2679 2696 {
2680 2697 int error;
2681 2698 int different_export = 0;
2682 2699 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2683 2700 struct exportinfo *exi = NULL, *pre_exi = NULL;
2684 2701 nfsstat4 stat;
2685 2702 fid_t fid;
2686 2703 int attrdir, dotdot, walk;
2687 2704 bool_t is_newvp = FALSE;
2688 2705
2689 2706 if (cs->vp->v_flag & V_XATTRDIR) {
2690 2707 attrdir = 1;
2691 2708 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2692 2709 } else {
2693 2710 attrdir = 0;
2694 2711 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
|
↓ open down ↓ |
1610 lines elided |
↑ open up ↑ |
2695 2712 }
2696 2713
2697 2714 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2698 2715
2699 2716 /*
2700 2717 * If dotdotting, then need to check whether it's
2701 2718 * above the root of a filesystem, or above an
2702 2719 * export point.
2703 2720 */
2704 2721 if (dotdot) {
2705 -
2722 + ASSERT(cs->exi != NULL);
2723 + ASSERT3U(cs->exi->exi_zoneid, ==, curzone->zone_id);
2706 2724 /*
2707 2725 * If dotdotting at the root of a filesystem, then
2708 2726 * need to traverse back to the mounted-on filesystem
2709 2727 * and do the dotdot lookup there.
2710 2728 */
2711 - if (cs->vp->v_flag & VROOT) {
2729 + if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2712 2730
2713 2731 /*
2714 2732 * If at the system root, then can
2715 2733 * go up no further.
2716 2734 */
2717 2735 if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2718 2736 return (puterrno4(ENOENT));
2719 2737
2720 2738 /*
2721 2739 * Traverse back to the mounted-on filesystem
2722 2740 */
2723 2741 cs->vp = untraverse(cs->vp);
2724 2742
2725 2743 /*
2726 2744 * Set the different_export flag so we remember
2727 2745 * to pick up a new exportinfo entry for
2728 2746 * this new filesystem.
2729 2747 */
2730 2748 different_export = 1;
2731 2749 } else {
2732 2750
2733 2751 /*
2734 2752 * If dotdotting above an export point then set
2735 2753 * the different_export to get new export info.
2736 2754 */
2737 2755 different_export = nfs_exported(cs->exi, cs->vp);
2738 2756 }
2739 2757 }
2740 2758
2741 2759 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2742 2760 NULL, NULL, NULL);
2743 2761 if (error)
2744 2762 return (puterrno4(error));
2745 2763
2746 2764 /*
2747 2765 * If the vnode is in a pseudo filesystem, check whether it is visible.
2748 2766 *
2749 2767 * XXX if the vnode is a symlink and it is not visible in
2750 2768 * a pseudo filesystem, return ENOENT (not following symlink).
2751 2769 * V4 client can not mount such symlink. This is a regression
2752 2770 * from V2/V3.
2753 2771 *
2754 2772 * In the same exported filesystem, if the security flavor used
2755 2773 * is not an explicitly shared flavor, limit the view to the visible
2756 2774 * list entries only. This is not a WRONGSEC case because it's already
2757 2775 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2758 2776 */
2759 2777 if (!different_export &&
2760 2778 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2761 2779 cs->access & CS_ACCESS_LIMITED)) {
2762 2780 if (! nfs_visible(cs->exi, vp, &different_export)) {
2763 2781 VN_RELE(vp);
2764 2782 return (puterrno4(ENOENT));
2765 2783 }
2766 2784 }
2767 2785
2768 2786 /*
2769 2787 * If it's a mountpoint, then traverse it.
2770 2788 */
2771 2789 if (vn_ismntpt(vp)) {
2772 2790 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2773 2791 pre_tvp = vp; /* save pre-traversed vnode */
2774 2792
2775 2793 /*
2776 2794 * hold pre_tvp to counteract rele by traverse. We will
2777 2795 * need pre_tvp below if checkexport4 fails
2778 2796 */
2779 2797 VN_HOLD(pre_tvp);
2780 2798 if ((error = traverse(&vp)) != 0) {
2781 2799 VN_RELE(vp);
2782 2800 VN_RELE(pre_tvp);
2783 2801 return (puterrno4(error));
2784 2802 }
2785 2803 different_export = 1;
2786 2804 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2787 2805 /*
2788 2806 * The vfsp comparison is to handle the case where
2789 2807 * a LOFS mount is shared. lo_lookup traverses mount points,
2790 2808 * and NFS is unaware of local fs transistions because
2791 2809 * v_vfsmountedhere isn't set. For this special LOFS case,
2792 2810 * the dir and the obj returned by lookup will have different
2793 2811 * vfs ptrs.
2794 2812 */
2795 2813 different_export = 1;
2796 2814 }
2797 2815
2798 2816 if (different_export) {
2799 2817
2800 2818 bzero(&fid, sizeof (fid));
2801 2819 fid.fid_len = MAXFIDSZ;
2802 2820 error = vop_fid_pseudo(vp, &fid);
2803 2821 if (error) {
2804 2822 VN_RELE(vp);
2805 2823 if (pre_tvp)
2806 2824 VN_RELE(pre_tvp);
2807 2825 return (puterrno4(error));
2808 2826 }
2809 2827
2810 2828 if (dotdot)
2811 2829 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2812 2830 else
2813 2831 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2814 2832
2815 2833 if (exi == NULL) {
2816 2834 if (pre_tvp) {
2817 2835 /*
2818 2836 * If this vnode is a mounted-on vnode,
2819 2837 * but the mounted-on file system is not
2820 2838 * exported, send back the filehandle for
2821 2839 * the mounted-on vnode, not the root of
2822 2840 * the mounted-on file system.
2823 2841 */
2824 2842 VN_RELE(vp);
2825 2843 vp = pre_tvp;
2826 2844 exi = pre_exi;
2827 2845 } else {
2828 2846 VN_RELE(vp);
2829 2847 return (puterrno4(EACCES));
2830 2848 }
2831 2849 } else if (pre_tvp) {
2832 2850 /* we're done with pre_tvp now. release extra hold */
2833 2851 VN_RELE(pre_tvp);
2834 2852 }
2835 2853
2836 2854 cs->exi = exi;
2837 2855
2838 2856 /*
2839 2857 * Now we do a checkauth4. The reason is that
2840 2858 * this client/user may not have access to the new
2841 2859 * exported file system, and if they do,
2842 2860 * the client/user may be mapped to a different uid.
2843 2861 *
2844 2862 * We start with a new cr, because the checkauth4 done
2845 2863 * in the PUT*FH operation over wrote the cred's uid,
2846 2864 * gid, etc, and we want the real thing before calling
2847 2865 * checkauth4()
2848 2866 */
2849 2867 crfree(cs->cr);
2850 2868 cs->cr = crdup(cs->basecr);
2851 2869
2852 2870 oldvp = cs->vp;
2853 2871 cs->vp = vp;
2854 2872 is_newvp = TRUE;
2855 2873
2856 2874 stat = call_checkauth4(cs, req);
2857 2875 if (stat != NFS4_OK) {
2858 2876 VN_RELE(cs->vp);
2859 2877 cs->vp = oldvp;
2860 2878 return (stat);
2861 2879 }
2862 2880 }
2863 2881
2864 2882 /*
2865 2883 * After various NFS checks, do a label check on the path
2866 2884 * component. The label on this path should either be the
2867 2885 * global zone's label or a zone's label. We are only
2868 2886 * interested in the zone's label because exported files
2869 2887 * in global zone is accessible (though read-only) to
2870 2888 * clients. The exportability/visibility check is already
2871 2889 * done before reaching this code.
2872 2890 */
2873 2891 if (is_system_labeled()) {
2874 2892 bslabel_t *clabel;
2875 2893
2876 2894 ASSERT(req->rq_label != NULL);
2877 2895 clabel = req->rq_label;
2878 2896 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2879 2897 "got client label from request(1)", struct svc_req *, req);
2880 2898
2881 2899 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2882 2900 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2883 2901 cs->exi)) {
2884 2902 error = EACCES;
2885 2903 goto err_out;
2886 2904 }
2887 2905 } else {
2888 2906 /*
2889 2907 * We grant access to admin_low label clients
2890 2908 * only if the client is trusted, i.e. also
2891 2909 * running Solaris Trusted Extension.
2892 2910 */
2893 2911 struct sockaddr *ca;
2894 2912 int addr_type;
2895 2913 void *ipaddr;
2896 2914 tsol_tpc_t *tp;
2897 2915
2898 2916 ca = (struct sockaddr *)svc_getrpccaller(
2899 2917 req->rq_xprt)->buf;
2900 2918 if (ca->sa_family == AF_INET) {
2901 2919 addr_type = IPV4_VERSION;
2902 2920 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2903 2921 } else if (ca->sa_family == AF_INET6) {
2904 2922 addr_type = IPV6_VERSION;
2905 2923 ipaddr = &((struct sockaddr_in6 *)
2906 2924 ca)->sin6_addr;
2907 2925 }
2908 2926 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2909 2927 if (tp == NULL || tp->tpc_tp.tp_doi !=
2910 2928 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2911 2929 SUN_CIPSO) {
2912 2930 if (tp != NULL)
2913 2931 TPC_RELE(tp);
2914 2932 error = EACCES;
2915 2933 goto err_out;
2916 2934 }
2917 2935 TPC_RELE(tp);
2918 2936 }
2919 2937 }
2920 2938
2921 2939 error = makefh4(&cs->fh, vp, cs->exi);
2922 2940
2923 2941 err_out:
2924 2942 if (error) {
2925 2943 if (is_newvp) {
2926 2944 VN_RELE(cs->vp);
2927 2945 cs->vp = oldvp;
2928 2946 } else
2929 2947 VN_RELE(vp);
2930 2948 return (puterrno4(error));
2931 2949 }
2932 2950
2933 2951 if (!is_newvp) {
2934 2952 if (cs->vp)
2935 2953 VN_RELE(cs->vp);
2936 2954 cs->vp = vp;
2937 2955 } else if (oldvp)
2938 2956 VN_RELE(oldvp);
2939 2957
2940 2958 /*
2941 2959 * if did lookup on attrdir and didn't lookup .., set named
2942 2960 * attr fh flag
2943 2961 */
2944 2962 if (attrdir && ! dotdot)
2945 2963 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2946 2964
2947 2965 /* Assume false for now, open proc will set this */
2948 2966 cs->mandlock = FALSE;
2949 2967
2950 2968 return (NFS4_OK);
2951 2969 }
2952 2970
2953 2971 /* ARGSUSED */
2954 2972 static void
2955 2973 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2956 2974 struct compound_state *cs)
2957 2975 {
2958 2976 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2959 2977 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2960 2978 char *nm;
2961 2979 uint_t len;
2962 2980 struct sockaddr *ca;
2963 2981 char *name = NULL;
2964 2982 nfsstat4 status;
2965 2983
2966 2984 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2967 2985 LOOKUP4args *, args);
2968 2986
2969 2987 if (cs->vp == NULL) {
2970 2988 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2971 2989 goto out;
2972 2990 }
2973 2991
2974 2992 if (cs->vp->v_type == VLNK) {
2975 2993 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2976 2994 goto out;
2977 2995 }
2978 2996
2979 2997 if (cs->vp->v_type != VDIR) {
2980 2998 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2981 2999 goto out;
2982 3000 }
2983 3001
2984 3002 status = utf8_dir_verify(&args->objname);
2985 3003 if (status != NFS4_OK) {
2986 3004 *cs->statusp = resp->status = status;
2987 3005 goto out;
2988 3006 }
2989 3007
2990 3008 nm = utf8_to_str(&args->objname, &len, NULL);
2991 3009 if (nm == NULL) {
2992 3010 *cs->statusp = resp->status = NFS4ERR_INVAL;
2993 3011 goto out;
2994 3012 }
2995 3013
2996 3014 if (len > MAXNAMELEN) {
2997 3015 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2998 3016 kmem_free(nm, len);
2999 3017 goto out;
3000 3018 }
3001 3019
3002 3020 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3003 3021 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3004 3022 MAXPATHLEN + 1);
3005 3023
3006 3024 if (name == NULL) {
3007 3025 *cs->statusp = resp->status = NFS4ERR_INVAL;
3008 3026 kmem_free(nm, len);
3009 3027 goto out;
3010 3028 }
3011 3029
3012 3030 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3013 3031
3014 3032 if (name != nm)
3015 3033 kmem_free(name, MAXPATHLEN + 1);
3016 3034 kmem_free(nm, len);
3017 3035
3018 3036 out:
3019 3037 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3020 3038 LOOKUP4res *, resp);
3021 3039 }
3022 3040
3023 3041 /* ARGSUSED */
3024 3042 static void
3025 3043 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3026 3044 struct compound_state *cs)
3027 3045 {
3028 3046 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3029 3047
3030 3048 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3031 3049
3032 3050 if (cs->vp == NULL) {
3033 3051 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3034 3052 goto out;
3035 3053 }
3036 3054
3037 3055 if (cs->vp->v_type != VDIR) {
3038 3056 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3039 3057 goto out;
3040 3058 }
3041 3059
3042 3060 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3043 3061
3044 3062 /*
3045 3063 * From NFSV4 Specification, LOOKUPP should not check for
3046 3064 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3047 3065 */
3048 3066 if (resp->status == NFS4ERR_WRONGSEC) {
3049 3067 *cs->statusp = resp->status = NFS4_OK;
3050 3068 }
3051 3069
3052 3070 out:
3053 3071 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3054 3072 LOOKUPP4res *, resp);
3055 3073 }
3056 3074
3057 3075
3058 3076 /*ARGSUSED2*/
3059 3077 static void
3060 3078 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3061 3079 struct compound_state *cs)
3062 3080 {
3063 3081 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
3064 3082 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
3065 3083 vnode_t *avp = NULL;
3066 3084 int lookup_flags = LOOKUP_XATTR, error;
3067 3085 int exp_ro = 0;
3068 3086
3069 3087 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3070 3088 OPENATTR4args *, args);
3071 3089
3072 3090 if (cs->vp == NULL) {
3073 3091 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3074 3092 goto out;
3075 3093 }
3076 3094
3077 3095 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3078 3096 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3079 3097 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3080 3098 goto out;
3081 3099 }
3082 3100
3083 3101 /*
3084 3102 * If file system supports passing ACE mask to VOP_ACCESS then
3085 3103 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3086 3104 */
3087 3105
3088 3106 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3089 3107 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3090 3108 V_ACE_MASK, cs->cr, NULL);
3091 3109 else
3092 3110 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3093 3111 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3094 3112 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3095 3113
3096 3114 if (error) {
3097 3115 *cs->statusp = resp->status = puterrno4(EACCES);
3098 3116 goto out;
3099 3117 }
3100 3118
3101 3119 /*
3102 3120 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3103 3121 * the file system is exported read-only -- regardless of
3104 3122 * createdir flag. Otherwise the attrdir would be created
3105 3123 * (assuming server fs isn't mounted readonly locally). If
3106 3124 * VOP_LOOKUP returns ENOENT in this case, the error will
3107 3125 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3108 3126 * because specfs has no VOP_LOOKUP op, so the macro would
3109 3127 * return ENOSYS. EINVAL is returned by all (current)
3110 3128 * Solaris file system implementations when any of their
3111 3129 * restrictions are violated (xattr(dir) can't have xattrdir).
3112 3130 * Returning NOTSUPP is more appropriate in this case
3113 3131 * because the object will never be able to have an attrdir.
3114 3132 */
3115 3133 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3116 3134 lookup_flags |= CREATE_XATTR_DIR;
3117 3135
3118 3136 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3119 3137 NULL, NULL, NULL);
3120 3138
3121 3139 if (error) {
3122 3140 if (error == ENOENT && args->createdir && exp_ro)
3123 3141 *cs->statusp = resp->status = puterrno4(EROFS);
3124 3142 else if (error == EINVAL || error == ENOSYS)
3125 3143 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3126 3144 else
3127 3145 *cs->statusp = resp->status = puterrno4(error);
3128 3146 goto out;
3129 3147 }
3130 3148
3131 3149 ASSERT(avp->v_flag & V_XATTRDIR);
3132 3150
3133 3151 error = makefh4(&cs->fh, avp, cs->exi);
3134 3152
3135 3153 if (error) {
3136 3154 VN_RELE(avp);
3137 3155 *cs->statusp = resp->status = puterrno4(error);
3138 3156 goto out;
3139 3157 }
3140 3158
3141 3159 VN_RELE(cs->vp);
3142 3160 cs->vp = avp;
3143 3161
3144 3162 /*
3145 3163 * There is no requirement for an attrdir fh flag
3146 3164 * because the attrdir has a vnode flag to distinguish
3147 3165 * it from regular (non-xattr) directories. The
3148 3166 * FH4_ATTRDIR flag is set for future sanity checks.
3149 3167 */
3150 3168 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3151 3169 *cs->statusp = resp->status = NFS4_OK;
3152 3170
3153 3171 out:
3154 3172 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3155 3173 OPENATTR4res *, resp);
3156 3174 }
3157 3175
3158 3176 static int
3159 3177 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3160 3178 caller_context_t *ct)
3161 3179 {
3162 3180 int error;
3163 3181 int i;
3164 3182 clock_t delaytime;
3165 3183
3166 3184 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3167 3185
3168 3186 /*
3169 3187 * Don't block on mandatory locks. If this routine returns
3170 3188 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3171 3189 */
3172 3190 uio->uio_fmode = FNONBLOCK;
3173 3191
3174 3192 for (i = 0; i < rfs4_maxlock_tries; i++) {
3175 3193
3176 3194
3177 3195 if (direction == FREAD) {
3178 3196 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3179 3197 error = VOP_READ(vp, uio, ioflag, cred, ct);
3180 3198 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3181 3199 } else {
3182 3200 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3183 3201 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3184 3202 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3185 3203 }
3186 3204
3187 3205 if (error != EAGAIN)
3188 3206 break;
3189 3207
3190 3208 if (i < rfs4_maxlock_tries - 1) {
3191 3209 delay(delaytime);
3192 3210 delaytime *= 2;
3193 3211 }
3194 3212 }
3195 3213
3196 3214 return (error);
3197 3215 }
3198 3216
3199 3217 /* ARGSUSED */
3200 3218 static void
3201 3219 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3202 3220 struct compound_state *cs)
3203 3221 {
3204 3222 READ4args *args = &argop->nfs_argop4_u.opread;
3205 3223 READ4res *resp = &resop->nfs_resop4_u.opread;
3206 3224 int error;
3207 3225 int verror;
3208 3226 vnode_t *vp;
3209 3227 struct vattr va;
3210 3228 struct iovec iov, *iovp = NULL;
3211 3229 int iovcnt;
3212 3230 struct uio uio;
3213 3231 u_offset_t offset;
3214 3232 bool_t *deleg = &cs->deleg;
3215 3233 nfsstat4 stat;
3216 3234 int in_crit = 0;
3217 3235 mblk_t *mp = NULL;
3218 3236 int alloc_err = 0;
3219 3237 int rdma_used = 0;
3220 3238 int loaned_buffers;
3221 3239 caller_context_t ct;
3222 3240 struct uio *uiop;
3223 3241
3224 3242 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3225 3243 READ4args, args);
3226 3244
3227 3245 vp = cs->vp;
3228 3246 if (vp == NULL) {
3229 3247 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3230 3248 goto out;
3231 3249 }
3232 3250 if (cs->access == CS_ACCESS_DENIED) {
3233 3251 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3234 3252 goto out;
3235 3253 }
3236 3254
3237 3255 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3238 3256 deleg, TRUE, &ct)) != NFS4_OK) {
3239 3257 *cs->statusp = resp->status = stat;
3240 3258 goto out;
3241 3259 }
3242 3260
3243 3261 /*
3244 3262 * Enter the critical region before calling VOP_RWLOCK
3245 3263 * to avoid a deadlock with write requests.
3246 3264 */
3247 3265 if (nbl_need_check(vp)) {
3248 3266 nbl_start_crit(vp, RW_READER);
3249 3267 in_crit = 1;
3250 3268 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3251 3269 &ct)) {
3252 3270 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3253 3271 goto out;
3254 3272 }
3255 3273 }
3256 3274
3257 3275 if (args->wlist) {
3258 3276 if (args->count > clist_len(args->wlist)) {
3259 3277 *cs->statusp = resp->status = NFS4ERR_INVAL;
3260 3278 goto out;
3261 3279 }
3262 3280 rdma_used = 1;
3263 3281 }
3264 3282
3265 3283 /* use loaned buffers for TCP */
3266 3284 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3267 3285
3268 3286 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3269 3287 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3270 3288
3271 3289 /*
3272 3290 * If we can't get the attributes, then we can't do the
3273 3291 * right access checking. So, we'll fail the request.
3274 3292 */
3275 3293 if (verror) {
3276 3294 *cs->statusp = resp->status = puterrno4(verror);
3277 3295 goto out;
3278 3296 }
3279 3297
3280 3298 if (vp->v_type != VREG) {
3281 3299 *cs->statusp = resp->status =
3282 3300 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3283 3301 goto out;
3284 3302 }
3285 3303
3286 3304 if (crgetuid(cs->cr) != va.va_uid &&
3287 3305 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3288 3306 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3289 3307 *cs->statusp = resp->status = puterrno4(error);
3290 3308 goto out;
3291 3309 }
3292 3310
3293 3311 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3294 3312 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3295 3313 goto out;
3296 3314 }
3297 3315
3298 3316 offset = args->offset;
3299 3317 if (offset >= va.va_size) {
3300 3318 *cs->statusp = resp->status = NFS4_OK;
3301 3319 resp->eof = TRUE;
3302 3320 resp->data_len = 0;
3303 3321 resp->data_val = NULL;
3304 3322 resp->mblk = NULL;
3305 3323 /* RDMA */
3306 3324 resp->wlist = args->wlist;
3307 3325 resp->wlist_len = resp->data_len;
3308 3326 *cs->statusp = resp->status = NFS4_OK;
3309 3327 if (resp->wlist)
3310 3328 clist_zero_len(resp->wlist);
3311 3329 goto out;
3312 3330 }
3313 3331
3314 3332 if (args->count == 0) {
3315 3333 *cs->statusp = resp->status = NFS4_OK;
3316 3334 resp->eof = FALSE;
3317 3335 resp->data_len = 0;
3318 3336 resp->data_val = NULL;
3319 3337 resp->mblk = NULL;
3320 3338 /* RDMA */
3321 3339 resp->wlist = args->wlist;
3322 3340 resp->wlist_len = resp->data_len;
3323 3341 if (resp->wlist)
3324 3342 clist_zero_len(resp->wlist);
3325 3343 goto out;
3326 3344 }
3327 3345
3328 3346 /*
3329 3347 * Do not allocate memory more than maximum allowed
3330 3348 * transfer size
3331 3349 */
3332 3350 if (args->count > rfs4_tsize(req))
3333 3351 args->count = rfs4_tsize(req);
3334 3352
3335 3353 if (loaned_buffers) {
3336 3354 uiop = (uio_t *)rfs_setup_xuio(vp);
3337 3355 ASSERT(uiop != NULL);
3338 3356 uiop->uio_segflg = UIO_SYSSPACE;
3339 3357 uiop->uio_loffset = args->offset;
3340 3358 uiop->uio_resid = args->count;
3341 3359
3342 3360 /* Jump to do the read if successful */
3343 3361 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3344 3362 /*
3345 3363 * Need to hold the vnode until after VOP_RETZCBUF()
3346 3364 * is called.
3347 3365 */
3348 3366 VN_HOLD(vp);
3349 3367 goto doio_read;
3350 3368 }
3351 3369
3352 3370 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3353 3371 uiop->uio_loffset, int, uiop->uio_resid);
3354 3372
3355 3373 uiop->uio_extflg = 0;
3356 3374
3357 3375 /* failure to setup for zero copy */
3358 3376 rfs_free_xuio((void *)uiop);
3359 3377 loaned_buffers = 0;
3360 3378 }
3361 3379
3362 3380 /*
3363 3381 * If returning data via RDMA Write, then grab the chunk list. If we
3364 3382 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3365 3383 */
3366 3384 if (rdma_used) {
3367 3385 mp = NULL;
3368 3386 (void) rdma_get_wchunk(req, &iov, args->wlist);
3369 3387 uio.uio_iov = &iov;
3370 3388 uio.uio_iovcnt = 1;
3371 3389 } else {
3372 3390 /*
3373 3391 * mp will contain the data to be sent out in the read reply.
3374 3392 * It will be freed after the reply has been sent.
3375 3393 */
3376 3394 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3377 3395 ASSERT(mp != NULL);
3378 3396 ASSERT(alloc_err == 0);
3379 3397 uio.uio_iov = iovp;
3380 3398 uio.uio_iovcnt = iovcnt;
3381 3399 }
3382 3400
3383 3401 uio.uio_segflg = UIO_SYSSPACE;
3384 3402 uio.uio_extflg = UIO_COPY_CACHED;
3385 3403 uio.uio_loffset = args->offset;
3386 3404 uio.uio_resid = args->count;
3387 3405 uiop = &uio;
3388 3406
3389 3407 doio_read:
3390 3408 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3391 3409
3392 3410 va.va_mask = AT_SIZE;
3393 3411 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3394 3412
3395 3413 if (error) {
3396 3414 if (mp)
3397 3415 freemsg(mp);
3398 3416 *cs->statusp = resp->status = puterrno4(error);
3399 3417 goto out;
3400 3418 }
3401 3419
3402 3420 /* make mblk using zc buffers */
3403 3421 if (loaned_buffers) {
3404 3422 mp = uio_to_mblk(uiop);
3405 3423 ASSERT(mp != NULL);
3406 3424 }
3407 3425
3408 3426 *cs->statusp = resp->status = NFS4_OK;
3409 3427
3410 3428 ASSERT(uiop->uio_resid >= 0);
3411 3429 resp->data_len = args->count - uiop->uio_resid;
3412 3430 if (mp) {
3413 3431 resp->data_val = (char *)mp->b_datap->db_base;
3414 3432 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3415 3433 } else {
3416 3434 resp->data_val = (caddr_t)iov.iov_base;
3417 3435 }
3418 3436
3419 3437 resp->mblk = mp;
3420 3438
3421 3439 if (!verror && offset + resp->data_len == va.va_size)
3422 3440 resp->eof = TRUE;
3423 3441 else
3424 3442 resp->eof = FALSE;
3425 3443
3426 3444 if (rdma_used) {
3427 3445 if (!rdma_setup_read_data4(args, resp)) {
3428 3446 *cs->statusp = resp->status = NFS4ERR_INVAL;
3429 3447 }
3430 3448 } else {
3431 3449 resp->wlist = NULL;
3432 3450 }
3433 3451
3434 3452 out:
3435 3453 if (in_crit)
3436 3454 nbl_end_crit(vp);
3437 3455
3438 3456 if (iovp != NULL)
3439 3457 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3440 3458
3441 3459 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3442 3460 READ4res *, resp);
3443 3461 }
3444 3462
3445 3463 static void
3446 3464 rfs4_op_read_free(nfs_resop4 *resop)
3447 3465 {
3448 3466 READ4res *resp = &resop->nfs_resop4_u.opread;
3449 3467
3450 3468 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3451 3469 freemsg(resp->mblk);
3452 3470 resp->mblk = NULL;
3453 3471 resp->data_val = NULL;
3454 3472 resp->data_len = 0;
3455 3473 }
3456 3474 }
3457 3475
3458 3476 static void
3459 3477 rfs4_op_readdir_free(nfs_resop4 * resop)
3460 3478 {
3461 3479 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3462 3480
3463 3481 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3464 3482 freeb(resp->mblk);
3465 3483 resp->mblk = NULL;
3466 3484 resp->data_len = 0;
3467 3485 }
3468 3486 }
3469 3487
3470 3488
3471 3489 /* ARGSUSED */
3472 3490 static void
3473 3491 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3474 3492 struct compound_state *cs)
3475 3493 {
3476 3494 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3477 3495 int error;
3478 3496 vnode_t *vp;
3479 3497 struct exportinfo *exi, *sav_exi;
3480 3498 nfs_fh4_fmt_t *fh_fmtp;
3481 3499 nfs_export_t *ne = nfs_get_export();
3482 3500
3483 3501 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3484 3502
3485 3503 if (cs->vp) {
3486 3504 VN_RELE(cs->vp);
3487 3505 cs->vp = NULL;
3488 3506 }
3489 3507
3490 3508 if (cs->cr)
3491 3509 crfree(cs->cr);
3492 3510
3493 3511 cs->cr = crdup(cs->basecr);
3494 3512
3495 3513 vp = ne->exi_public->exi_vp;
3496 3514 if (vp == NULL) {
3497 3515 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3498 3516 goto out;
3499 3517 }
3500 3518
3501 3519 error = makefh4(&cs->fh, vp, ne->exi_public);
3502 3520 if (error != 0) {
3503 3521 *cs->statusp = resp->status = puterrno4(error);
3504 3522 goto out;
3505 3523 }
3506 3524 sav_exi = cs->exi;
3507 3525 if (ne->exi_public == ne->exi_root) {
3508 3526 /*
3509 3527 * No filesystem is actually shared public, so we default
3510 3528 * to exi_root. In this case, we must check whether root
3511 3529 * is exported.
3512 3530 */
3513 3531 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3514 3532
3515 3533 /*
3516 3534 * if root filesystem is exported, the exportinfo struct that we
3517 3535 * should use is what checkexport4 returns, because root_exi is
3518 3536 * actually a mostly empty struct.
3519 3537 */
3520 3538 exi = checkexport4(&fh_fmtp->fh4_fsid,
3521 3539 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3522 3540 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3523 3541 } else {
3524 3542 /*
3525 3543 * it's a properly shared filesystem
3526 3544 */
3527 3545 cs->exi = ne->exi_public;
3528 3546 }
3529 3547
3530 3548 if (is_system_labeled()) {
3531 3549 bslabel_t *clabel;
3532 3550
3533 3551 ASSERT(req->rq_label != NULL);
3534 3552 clabel = req->rq_label;
3535 3553 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3536 3554 "got client label from request(1)",
3537 3555 struct svc_req *, req);
3538 3556 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3539 3557 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3540 3558 cs->exi)) {
3541 3559 *cs->statusp = resp->status =
3542 3560 NFS4ERR_SERVERFAULT;
3543 3561 goto out;
3544 3562 }
3545 3563 }
3546 3564 }
3547 3565
3548 3566 VN_HOLD(vp);
3549 3567 cs->vp = vp;
3550 3568
3551 3569 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3552 3570 VN_RELE(cs->vp);
3553 3571 cs->vp = NULL;
3554 3572 cs->exi = sav_exi;
3555 3573 goto out;
3556 3574 }
3557 3575
3558 3576 *cs->statusp = resp->status = NFS4_OK;
3559 3577 out:
3560 3578 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3561 3579 PUTPUBFH4res *, resp);
3562 3580 }
3563 3581
3564 3582 /*
3565 3583 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3566 3584 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3567 3585 * or joe have restrictive search permissions, then we shouldn't let
3568 3586 * the client get a file handle. This is easy to enforce. However, we
3569 3587 * don't know what security flavor should be used until we resolve the
3570 3588 * path name. Another complication is uid mapping. If root is
3571 3589 * the user, then it will be mapped to the anonymous user by default,
3572 3590 * but we won't know that till we've resolved the path name. And we won't
3573 3591 * know what the anonymous user is.
3574 3592 * Luckily, SECINFO is specified to take a full filename.
3575 3593 * So what we will have to in rfs4_op_lookup is check that flavor of
3576 3594 * the target object matches that of the request, and if root was the
3577 3595 * caller, check for the root= and anon= options, and if necessary,
3578 3596 * repeat the lookup using the right cred_t. But that's not done yet.
3579 3597 */
3580 3598 /* ARGSUSED */
3581 3599 static void
3582 3600 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3583 3601 struct compound_state *cs)
3584 3602 {
3585 3603 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3586 3604 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3587 3605 nfs_fh4_fmt_t *fh_fmtp;
3588 3606
3589 3607 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3590 3608 PUTFH4args *, args);
3591 3609
3592 3610 if (cs->vp) {
3593 3611 VN_RELE(cs->vp);
3594 3612 cs->vp = NULL;
3595 3613 }
3596 3614
3597 3615 if (cs->cr) {
3598 3616 crfree(cs->cr);
3599 3617 cs->cr = NULL;
3600 3618 }
3601 3619
3602 3620
3603 3621 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3604 3622 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3605 3623 goto out;
3606 3624 }
3607 3625
3608 3626 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3609 3627 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3610 3628 NULL);
3611 3629
3612 3630 if (cs->exi == NULL) {
3613 3631 *cs->statusp = resp->status = NFS4ERR_STALE;
3614 3632 goto out;
3615 3633 }
3616 3634
3617 3635 cs->cr = crdup(cs->basecr);
3618 3636
3619 3637 ASSERT(cs->cr != NULL);
3620 3638
3621 3639 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3622 3640 *cs->statusp = resp->status;
3623 3641 goto out;
3624 3642 }
3625 3643
3626 3644 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3627 3645 VN_RELE(cs->vp);
3628 3646 cs->vp = NULL;
3629 3647 goto out;
3630 3648 }
3631 3649
3632 3650 nfs_fh4_copy(&args->object, &cs->fh);
3633 3651 *cs->statusp = resp->status = NFS4_OK;
3634 3652 cs->deleg = FALSE;
3635 3653
3636 3654 out:
3637 3655 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3638 3656 PUTFH4res *, resp);
3639 3657 }
3640 3658
3641 3659 /* ARGSUSED */
3642 3660 static void
3643 3661 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3644 3662 struct compound_state *cs)
3645 3663 {
3646 3664 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3647 3665 int error;
3648 3666 fid_t fid;
3649 3667 struct exportinfo *exi, *sav_exi;
3650 3668
3651 3669 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3652 3670
3653 3671 if (cs->vp) {
3654 3672 VN_RELE(cs->vp);
3655 3673 cs->vp = NULL;
3656 3674 }
3657 3675
3658 3676 if (cs->cr)
3659 3677 crfree(cs->cr);
3660 3678
3661 3679 cs->cr = crdup(cs->basecr);
3662 3680
3663 3681 /*
3664 3682 * Using rootdir, the system root vnode,
3665 3683 * get its fid.
3666 3684 */
3667 3685 bzero(&fid, sizeof (fid));
3668 3686 fid.fid_len = MAXFIDSZ;
3669 3687 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3670 3688 if (error != 0) {
3671 3689 *cs->statusp = resp->status = puterrno4(error);
3672 3690 goto out;
3673 3691 }
3674 3692
3675 3693 /*
3676 3694 * Then use the root fsid & fid it to find out if it's exported
3677 3695 *
3678 3696 * If the server root isn't exported directly, then
3679 3697 * it should at least be a pseudo export based on
3680 3698 * one or more exports further down in the server's
3681 3699 * file tree.
3682 3700 */
3683 3701 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3684 3702 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3685 3703 NFS4_DEBUG(rfs4_debug,
3686 3704 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3687 3705 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3688 3706 goto out;
3689 3707 }
3690 3708
3691 3709 /*
3692 3710 * Now make a filehandle based on the root
3693 3711 * export and root vnode.
3694 3712 */
3695 3713 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3696 3714 if (error != 0) {
3697 3715 *cs->statusp = resp->status = puterrno4(error);
3698 3716 goto out;
3699 3717 }
3700 3718
3701 3719 sav_exi = cs->exi;
3702 3720 cs->exi = exi;
3703 3721
3704 3722 VN_HOLD(ZONE_ROOTVP());
3705 3723 cs->vp = ZONE_ROOTVP();
3706 3724
3707 3725 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3708 3726 VN_RELE(cs->vp);
3709 3727 cs->vp = NULL;
3710 3728 cs->exi = sav_exi;
3711 3729 goto out;
3712 3730 }
3713 3731
3714 3732 *cs->statusp = resp->status = NFS4_OK;
3715 3733 cs->deleg = FALSE;
3716 3734 out:
3717 3735 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3718 3736 PUTROOTFH4res *, resp);
3719 3737 }
3720 3738
3721 3739 /*
3722 3740 * readlink: args: CURRENT_FH.
3723 3741 * res: status. If success - CURRENT_FH unchanged, return linktext.
3724 3742 */
3725 3743
3726 3744 /* ARGSUSED */
3727 3745 static void
3728 3746 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3729 3747 struct compound_state *cs)
3730 3748 {
3731 3749 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3732 3750 int error;
3733 3751 vnode_t *vp;
3734 3752 struct iovec iov;
3735 3753 struct vattr va;
3736 3754 struct uio uio;
3737 3755 char *data;
3738 3756 struct sockaddr *ca;
3739 3757 char *name = NULL;
3740 3758 int is_referral;
3741 3759
3742 3760 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3743 3761
3744 3762 /* CURRENT_FH: directory */
3745 3763 vp = cs->vp;
3746 3764 if (vp == NULL) {
3747 3765 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3748 3766 goto out;
3749 3767 }
3750 3768
3751 3769 if (cs->access == CS_ACCESS_DENIED) {
3752 3770 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3753 3771 goto out;
3754 3772 }
3755 3773
3756 3774 /* Is it a referral? */
3757 3775 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3758 3776
3759 3777 is_referral = 1;
3760 3778
3761 3779 } else {
3762 3780
3763 3781 is_referral = 0;
3764 3782
3765 3783 if (vp->v_type == VDIR) {
3766 3784 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3767 3785 goto out;
3768 3786 }
3769 3787
3770 3788 if (vp->v_type != VLNK) {
3771 3789 *cs->statusp = resp->status = NFS4ERR_INVAL;
3772 3790 goto out;
3773 3791 }
3774 3792
3775 3793 }
3776 3794
3777 3795 va.va_mask = AT_MODE;
3778 3796 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3779 3797 if (error) {
3780 3798 *cs->statusp = resp->status = puterrno4(error);
3781 3799 goto out;
3782 3800 }
3783 3801
3784 3802 if (MANDLOCK(vp, va.va_mode)) {
3785 3803 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3786 3804 goto out;
3787 3805 }
3788 3806
3789 3807 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3790 3808
3791 3809 if (is_referral) {
3792 3810 char *s;
3793 3811 size_t strsz;
3794 3812
3795 3813 /* Get an artificial symlink based on a referral */
3796 3814 s = build_symlink(vp, cs->cr, &strsz);
3797 3815 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3798 3816 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3799 3817 vnode_t *, vp, char *, s);
3800 3818 if (s == NULL)
3801 3819 error = EINVAL;
3802 3820 else {
3803 3821 error = 0;
3804 3822 (void) strlcpy(data, s, MAXPATHLEN + 1);
3805 3823 kmem_free(s, strsz);
3806 3824 }
3807 3825
3808 3826 } else {
3809 3827
3810 3828 iov.iov_base = data;
3811 3829 iov.iov_len = MAXPATHLEN;
3812 3830 uio.uio_iov = &iov;
3813 3831 uio.uio_iovcnt = 1;
3814 3832 uio.uio_segflg = UIO_SYSSPACE;
3815 3833 uio.uio_extflg = UIO_COPY_CACHED;
3816 3834 uio.uio_loffset = 0;
3817 3835 uio.uio_resid = MAXPATHLEN;
3818 3836
3819 3837 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3820 3838
3821 3839 if (!error)
3822 3840 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3823 3841 }
3824 3842
3825 3843 if (error) {
3826 3844 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3827 3845 *cs->statusp = resp->status = puterrno4(error);
3828 3846 goto out;
3829 3847 }
3830 3848
3831 3849 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3832 3850 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3833 3851 MAXPATHLEN + 1);
3834 3852
3835 3853 if (name == NULL) {
3836 3854 /*
3837 3855 * Even though the conversion failed, we return
3838 3856 * something. We just don't translate it.
3839 3857 */
3840 3858 name = data;
3841 3859 }
3842 3860
3843 3861 /*
3844 3862 * treat link name as data
3845 3863 */
3846 3864 (void) str_to_utf8(name, (utf8string *)&resp->link);
3847 3865
3848 3866 if (name != data)
3849 3867 kmem_free(name, MAXPATHLEN + 1);
3850 3868 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3851 3869 *cs->statusp = resp->status = NFS4_OK;
3852 3870
3853 3871 out:
3854 3872 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3855 3873 READLINK4res *, resp);
3856 3874 }
3857 3875
3858 3876 static void
3859 3877 rfs4_op_readlink_free(nfs_resop4 *resop)
3860 3878 {
3861 3879 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3862 3880 utf8string *symlink = (utf8string *)&resp->link;
3863 3881
3864 3882 if (symlink->utf8string_val) {
3865 3883 UTF8STRING_FREE(*symlink)
3866 3884 }
3867 3885 }
3868 3886
3869 3887 /*
3870 3888 * release_lockowner:
3871 3889 * Release any state associated with the supplied
3872 3890 * lockowner. Note if any lo_state is holding locks we will not
3873 3891 * rele that lo_state and thus the lockowner will not be destroyed.
3874 3892 * A client using lock after the lock owner stateid has been released
3875 3893 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3876 3894 * to reissue the lock with new_lock_owner set to TRUE.
3877 3895 * args: lock_owner
3878 3896 * res: status
3879 3897 */
3880 3898 /* ARGSUSED */
3881 3899 static void
3882 3900 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3883 3901 struct svc_req *req, struct compound_state *cs)
3884 3902 {
3885 3903 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3886 3904 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3887 3905 rfs4_lockowner_t *lo;
3888 3906 rfs4_openowner_t *oo;
3889 3907 rfs4_state_t *sp;
3890 3908 rfs4_lo_state_t *lsp;
3891 3909 rfs4_client_t *cp;
3892 3910 bool_t create = FALSE;
3893 3911 locklist_t *llist;
3894 3912 sysid_t sysid;
3895 3913
3896 3914 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3897 3915 cs, RELEASE_LOCKOWNER4args *, ap);
3898 3916
3899 3917 /* Make sure there is a clientid around for this request */
3900 3918 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3901 3919
3902 3920 if (cp == NULL) {
3903 3921 *cs->statusp = resp->status =
3904 3922 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3905 3923 goto out;
3906 3924 }
3907 3925 rfs4_client_rele(cp);
3908 3926
3909 3927 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3910 3928 if (lo == NULL) {
3911 3929 *cs->statusp = resp->status = NFS4_OK;
3912 3930 goto out;
3913 3931 }
3914 3932 ASSERT(lo->rl_client != NULL);
3915 3933
3916 3934 /*
3917 3935 * Check for EXPIRED client. If so will reap state with in a lease
3918 3936 * period or on next set_clientid_confirm step
3919 3937 */
3920 3938 if (rfs4_lease_expired(lo->rl_client)) {
3921 3939 rfs4_lockowner_rele(lo);
3922 3940 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3923 3941 goto out;
3924 3942 }
3925 3943
3926 3944 /*
3927 3945 * If no sysid has been assigned, then no locks exist; just return.
3928 3946 */
3929 3947 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3930 3948 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3931 3949 rfs4_lockowner_rele(lo);
3932 3950 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3933 3951 goto out;
3934 3952 }
3935 3953
3936 3954 sysid = lo->rl_client->rc_sysidt;
3937 3955 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3938 3956
3939 3957 /*
3940 3958 * Mark the lockowner invalid.
3941 3959 */
3942 3960 rfs4_dbe_hide(lo->rl_dbe);
3943 3961
3944 3962 /*
3945 3963 * sysid-pid pair should now not be used since the lockowner is
3946 3964 * invalid. If the client were to instantiate the lockowner again
3947 3965 * it would be assigned a new pid. Thus we can get the list of
3948 3966 * current locks.
3949 3967 */
3950 3968
3951 3969 llist = flk_get_active_locks(sysid, lo->rl_pid);
3952 3970 /* If we are still holding locks fail */
3953 3971 if (llist != NULL) {
3954 3972
3955 3973 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3956 3974
3957 3975 flk_free_locklist(llist);
3958 3976 /*
3959 3977 * We need to unhide the lockowner so the client can
3960 3978 * try it again. The bad thing here is if the client
3961 3979 * has a logic error that took it here in the first place
3962 3980 * they probably have lost accounting of the locks that it
3963 3981 * is holding. So we may have dangling state until the
3964 3982 * open owner state is reaped via close. One scenario
3965 3983 * that could possibly occur is that the client has
3966 3984 * sent the unlock request(s) in separate threads
3967 3985 * and has not waited for the replies before sending the
3968 3986 * RELEASE_LOCKOWNER request. Presumably, it would expect
3969 3987 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3970 3988 * reissuing the request.
3971 3989 */
3972 3990 rfs4_dbe_unhide(lo->rl_dbe);
3973 3991 rfs4_lockowner_rele(lo);
3974 3992 goto out;
3975 3993 }
3976 3994
3977 3995 /*
3978 3996 * For the corresponding client we need to check each open
3979 3997 * owner for any opens that have lockowner state associated
3980 3998 * with this lockowner.
3981 3999 */
3982 4000
3983 4001 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3984 4002 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3985 4003 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3986 4004
3987 4005 rfs4_dbe_lock(oo->ro_dbe);
3988 4006 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3989 4007 sp = list_next(&oo->ro_statelist, sp)) {
3990 4008
3991 4009 rfs4_dbe_lock(sp->rs_dbe);
3992 4010 for (lsp = list_head(&sp->rs_lostatelist);
3993 4011 lsp != NULL;
3994 4012 lsp = list_next(&sp->rs_lostatelist, lsp)) {
3995 4013 if (lsp->rls_locker == lo) {
3996 4014 rfs4_dbe_lock(lsp->rls_dbe);
3997 4015 rfs4_dbe_invalidate(lsp->rls_dbe);
3998 4016 rfs4_dbe_unlock(lsp->rls_dbe);
3999 4017 }
4000 4018 }
4001 4019 rfs4_dbe_unlock(sp->rs_dbe);
4002 4020 }
4003 4021 rfs4_dbe_unlock(oo->ro_dbe);
4004 4022 }
4005 4023 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4006 4024
4007 4025 rfs4_lockowner_rele(lo);
4008 4026
4009 4027 *cs->statusp = resp->status = NFS4_OK;
4010 4028
4011 4029 out:
4012 4030 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4013 4031 cs, RELEASE_LOCKOWNER4res *, resp);
4014 4032 }
4015 4033
4016 4034 /*
4017 4035 * short utility function to lookup a file and recall the delegation
4018 4036 */
4019 4037 static rfs4_file_t *
4020 4038 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4021 4039 int *lkup_error, cred_t *cr)
4022 4040 {
4023 4041 vnode_t *vp;
4024 4042 rfs4_file_t *fp = NULL;
4025 4043 bool_t fcreate = FALSE;
4026 4044 int error;
4027 4045
4028 4046 if (vpp)
4029 4047 *vpp = NULL;
4030 4048
4031 4049 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4032 4050 NULL)) == 0) {
4033 4051 if (vp->v_type == VREG)
4034 4052 fp = rfs4_findfile(vp, NULL, &fcreate);
4035 4053 if (vpp)
4036 4054 *vpp = vp;
4037 4055 else
4038 4056 VN_RELE(vp);
4039 4057 }
4040 4058
4041 4059 if (lkup_error)
4042 4060 *lkup_error = error;
4043 4061
4044 4062 return (fp);
4045 4063 }
4046 4064
4047 4065 /*
4048 4066 * remove: args: CURRENT_FH: directory; name.
4049 4067 * res: status. If success - CURRENT_FH unchanged, return change_info
4050 4068 * for directory.
4051 4069 */
4052 4070 /* ARGSUSED */
4053 4071 static void
4054 4072 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4055 4073 struct compound_state *cs)
4056 4074 {
4057 4075 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4058 4076 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4059 4077 int error;
4060 4078 vnode_t *dvp, *vp;
4061 4079 struct vattr bdva, idva, adva;
4062 4080 char *nm;
4063 4081 uint_t len;
4064 4082 rfs4_file_t *fp;
4065 4083 int in_crit = 0;
4066 4084 bslabel_t *clabel;
4067 4085 struct sockaddr *ca;
4068 4086 char *name = NULL;
4069 4087 nfsstat4 status;
4070 4088
4071 4089 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4072 4090 REMOVE4args *, args);
4073 4091
4074 4092 /* CURRENT_FH: directory */
4075 4093 dvp = cs->vp;
4076 4094 if (dvp == NULL) {
4077 4095 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4078 4096 goto out;
4079 4097 }
4080 4098
4081 4099 if (cs->access == CS_ACCESS_DENIED) {
4082 4100 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4083 4101 goto out;
4084 4102 }
4085 4103
4086 4104 /*
4087 4105 * If there is an unshared filesystem mounted on this vnode,
4088 4106 * Do not allow to remove anything in this directory.
4089 4107 */
4090 4108 if (vn_ismntpt(dvp)) {
4091 4109 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4092 4110 goto out;
4093 4111 }
4094 4112
4095 4113 if (dvp->v_type != VDIR) {
4096 4114 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4097 4115 goto out;
4098 4116 }
4099 4117
4100 4118 status = utf8_dir_verify(&args->target);
4101 4119 if (status != NFS4_OK) {
4102 4120 *cs->statusp = resp->status = status;
4103 4121 goto out;
4104 4122 }
4105 4123
4106 4124 /*
4107 4125 * Lookup the file so that we can check if it's a directory
4108 4126 */
4109 4127 nm = utf8_to_fn(&args->target, &len, NULL);
4110 4128 if (nm == NULL) {
4111 4129 *cs->statusp = resp->status = NFS4ERR_INVAL;
4112 4130 goto out;
4113 4131 }
4114 4132
4115 4133 if (len > MAXNAMELEN) {
4116 4134 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4117 4135 kmem_free(nm, len);
4118 4136 goto out;
4119 4137 }
4120 4138
4121 4139 if (rdonly4(req, cs)) {
4122 4140 *cs->statusp = resp->status = NFS4ERR_ROFS;
4123 4141 kmem_free(nm, len);
4124 4142 goto out;
4125 4143 }
4126 4144
4127 4145 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4128 4146 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4129 4147 MAXPATHLEN + 1);
4130 4148
4131 4149 if (name == NULL) {
4132 4150 *cs->statusp = resp->status = NFS4ERR_INVAL;
4133 4151 kmem_free(nm, len);
4134 4152 goto out;
4135 4153 }
4136 4154
4137 4155 /*
4138 4156 * Lookup the file to determine type and while we are see if
4139 4157 * there is a file struct around and check for delegation.
4140 4158 * We don't need to acquire va_seq before this lookup, if
4141 4159 * it causes an update, cinfo.before will not match, which will
4142 4160 * trigger a cache flush even if atomic is TRUE.
4143 4161 */
4144 4162 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4145 4163 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4146 4164 NULL)) {
4147 4165 VN_RELE(vp);
4148 4166 rfs4_file_rele(fp);
4149 4167 *cs->statusp = resp->status = NFS4ERR_DELAY;
4150 4168 if (nm != name)
4151 4169 kmem_free(name, MAXPATHLEN + 1);
4152 4170 kmem_free(nm, len);
4153 4171 goto out;
4154 4172 }
4155 4173 }
4156 4174
4157 4175 /* Didn't find anything to remove */
4158 4176 if (vp == NULL) {
4159 4177 *cs->statusp = resp->status = error;
4160 4178 if (nm != name)
4161 4179 kmem_free(name, MAXPATHLEN + 1);
4162 4180 kmem_free(nm, len);
4163 4181 goto out;
4164 4182 }
4165 4183
4166 4184 if (nbl_need_check(vp)) {
4167 4185 nbl_start_crit(vp, RW_READER);
4168 4186 in_crit = 1;
4169 4187 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4170 4188 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4171 4189 if (nm != name)
4172 4190 kmem_free(name, MAXPATHLEN + 1);
4173 4191 kmem_free(nm, len);
4174 4192 nbl_end_crit(vp);
4175 4193 VN_RELE(vp);
4176 4194 if (fp) {
4177 4195 rfs4_clear_dont_grant(fp);
4178 4196 rfs4_file_rele(fp);
4179 4197 }
4180 4198 goto out;
4181 4199 }
4182 4200 }
4183 4201
4184 4202 /* check label before allowing removal */
4185 4203 if (is_system_labeled()) {
4186 4204 ASSERT(req->rq_label != NULL);
4187 4205 clabel = req->rq_label;
4188 4206 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4189 4207 "got client label from request(1)",
4190 4208 struct svc_req *, req);
4191 4209 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4192 4210 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4193 4211 cs->exi)) {
4194 4212 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4195 4213 if (name != nm)
4196 4214 kmem_free(name, MAXPATHLEN + 1);
4197 4215 kmem_free(nm, len);
4198 4216 if (in_crit)
4199 4217 nbl_end_crit(vp);
4200 4218 VN_RELE(vp);
4201 4219 if (fp) {
4202 4220 rfs4_clear_dont_grant(fp);
4203 4221 rfs4_file_rele(fp);
4204 4222 }
4205 4223 goto out;
4206 4224 }
4207 4225 }
4208 4226 }
4209 4227
4210 4228 /* Get dir "before" change value */
4211 4229 bdva.va_mask = AT_CTIME|AT_SEQ;
4212 4230 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4213 4231 if (error) {
4214 4232 *cs->statusp = resp->status = puterrno4(error);
4215 4233 if (nm != name)
4216 4234 kmem_free(name, MAXPATHLEN + 1);
4217 4235 kmem_free(nm, len);
4218 4236 if (in_crit)
4219 4237 nbl_end_crit(vp);
4220 4238 VN_RELE(vp);
4221 4239 if (fp) {
4222 4240 rfs4_clear_dont_grant(fp);
4223 4241 rfs4_file_rele(fp);
4224 4242 }
4225 4243 goto out;
4226 4244 }
4227 4245 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4228 4246
4229 4247 /* Actually do the REMOVE operation */
4230 4248 if (vp->v_type == VDIR) {
4231 4249 /*
4232 4250 * Can't remove a directory that has a mounted-on filesystem.
4233 4251 */
4234 4252 if (vn_ismntpt(vp)) {
4235 4253 error = EACCES;
4236 4254 } else {
4237 4255 /*
4238 4256 * System V defines rmdir to return EEXIST,
4239 4257 * not ENOTEMPTY, if the directory is not
4240 4258 * empty. A System V NFS server needs to map
4241 4259 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4242 4260 * transmit over the wire.
4243 4261 */
4244 4262 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4245 4263 NULL, 0)) == EEXIST)
4246 4264 error = ENOTEMPTY;
4247 4265 }
4248 4266 } else {
4249 4267 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4250 4268 fp != NULL) {
4251 4269 struct vattr va;
4252 4270 vnode_t *tvp;
4253 4271
4254 4272 rfs4_dbe_lock(fp->rf_dbe);
4255 4273 tvp = fp->rf_vp;
4256 4274 if (tvp)
4257 4275 VN_HOLD(tvp);
4258 4276 rfs4_dbe_unlock(fp->rf_dbe);
4259 4277
4260 4278 if (tvp) {
4261 4279 /*
4262 4280 * This is va_seq safe because we are not
4263 4281 * manipulating dvp.
4264 4282 */
4265 4283 va.va_mask = AT_NLINK;
4266 4284 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4267 4285 va.va_nlink == 0) {
4268 4286 /* Remove state on file remove */
4269 4287 if (in_crit) {
4270 4288 nbl_end_crit(vp);
4271 4289 in_crit = 0;
4272 4290 }
4273 4291 rfs4_close_all_state(fp);
4274 4292 }
4275 4293 VN_RELE(tvp);
4276 4294 }
4277 4295 }
4278 4296 }
4279 4297
4280 4298 if (in_crit)
4281 4299 nbl_end_crit(vp);
4282 4300 VN_RELE(vp);
4283 4301
4284 4302 if (fp) {
4285 4303 rfs4_clear_dont_grant(fp);
4286 4304 rfs4_file_rele(fp);
4287 4305 }
4288 4306 if (nm != name)
4289 4307 kmem_free(name, MAXPATHLEN + 1);
4290 4308 kmem_free(nm, len);
4291 4309
4292 4310 if (error) {
4293 4311 *cs->statusp = resp->status = puterrno4(error);
4294 4312 goto out;
4295 4313 }
4296 4314
4297 4315 /*
4298 4316 * Get the initial "after" sequence number, if it fails, set to zero
4299 4317 */
4300 4318 idva.va_mask = AT_SEQ;
4301 4319 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4302 4320 idva.va_seq = 0;
4303 4321
4304 4322 /*
4305 4323 * Force modified data and metadata out to stable storage.
4306 4324 */
4307 4325 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4308 4326
4309 4327 /*
4310 4328 * Get "after" change value, if it fails, simply return the
4311 4329 * before value.
4312 4330 */
4313 4331 adva.va_mask = AT_CTIME|AT_SEQ;
4314 4332 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4315 4333 adva.va_ctime = bdva.va_ctime;
4316 4334 adva.va_seq = 0;
4317 4335 }
4318 4336
4319 4337 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4320 4338
4321 4339 /*
4322 4340 * The cinfo.atomic = TRUE only if we have
4323 4341 * non-zero va_seq's, and it has incremented by exactly one
4324 4342 * during the VOP_REMOVE/RMDIR and it didn't change during
4325 4343 * the VOP_FSYNC.
4326 4344 */
4327 4345 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4328 4346 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4329 4347 resp->cinfo.atomic = TRUE;
4330 4348 else
4331 4349 resp->cinfo.atomic = FALSE;
4332 4350
4333 4351 *cs->statusp = resp->status = NFS4_OK;
4334 4352
4335 4353 out:
4336 4354 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4337 4355 REMOVE4res *, resp);
4338 4356 }
4339 4357
4340 4358 /*
4341 4359 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4342 4360 * oldname and newname.
4343 4361 * res: status. If success - CURRENT_FH unchanged, return change_info
4344 4362 * for both from and target directories.
4345 4363 */
4346 4364 /* ARGSUSED */
4347 4365 static void
4348 4366 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4349 4367 struct compound_state *cs)
4350 4368 {
4351 4369 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4352 4370 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4353 4371 int error;
4354 4372 vnode_t *odvp;
4355 4373 vnode_t *ndvp;
4356 4374 vnode_t *srcvp, *targvp, *tvp;
4357 4375 struct vattr obdva, oidva, oadva;
4358 4376 struct vattr nbdva, nidva, nadva;
4359 4377 char *onm, *nnm;
4360 4378 uint_t olen, nlen;
4361 4379 rfs4_file_t *fp, *sfp;
4362 4380 int in_crit_src, in_crit_targ;
4363 4381 int fp_rele_grant_hold, sfp_rele_grant_hold;
4364 4382 int unlinked;
4365 4383 bslabel_t *clabel;
4366 4384 struct sockaddr *ca;
4367 4385 char *converted_onm = NULL;
4368 4386 char *converted_nnm = NULL;
4369 4387 nfsstat4 status;
4370 4388
4371 4389 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4372 4390 RENAME4args *, args);
4373 4391
4374 4392 fp = sfp = NULL;
4375 4393 srcvp = targvp = tvp = NULL;
4376 4394 in_crit_src = in_crit_targ = 0;
4377 4395 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4378 4396 unlinked = 0;
4379 4397
4380 4398 /* CURRENT_FH: target directory */
4381 4399 ndvp = cs->vp;
4382 4400 if (ndvp == NULL) {
4383 4401 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4384 4402 goto out;
4385 4403 }
4386 4404
4387 4405 /* SAVED_FH: from directory */
4388 4406 odvp = cs->saved_vp;
4389 4407 if (odvp == NULL) {
4390 4408 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4391 4409 goto out;
4392 4410 }
4393 4411
4394 4412 if (cs->access == CS_ACCESS_DENIED) {
4395 4413 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4396 4414 goto out;
4397 4415 }
4398 4416
4399 4417 /*
4400 4418 * If there is an unshared filesystem mounted on this vnode,
4401 4419 * do not allow to rename objects in this directory.
4402 4420 */
4403 4421 if (vn_ismntpt(odvp)) {
4404 4422 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4405 4423 goto out;
4406 4424 }
4407 4425
4408 4426 /*
4409 4427 * If there is an unshared filesystem mounted on this vnode,
4410 4428 * do not allow to rename to this directory.
4411 4429 */
4412 4430 if (vn_ismntpt(ndvp)) {
4413 4431 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4414 4432 goto out;
4415 4433 }
4416 4434
4417 4435 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4418 4436 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4419 4437 goto out;
4420 4438 }
4421 4439
4422 4440 if (cs->saved_exi != cs->exi) {
4423 4441 *cs->statusp = resp->status = NFS4ERR_XDEV;
4424 4442 goto out;
4425 4443 }
4426 4444
4427 4445 status = utf8_dir_verify(&args->oldname);
4428 4446 if (status != NFS4_OK) {
4429 4447 *cs->statusp = resp->status = status;
4430 4448 goto out;
4431 4449 }
4432 4450
4433 4451 status = utf8_dir_verify(&args->newname);
4434 4452 if (status != NFS4_OK) {
4435 4453 *cs->statusp = resp->status = status;
4436 4454 goto out;
4437 4455 }
4438 4456
4439 4457 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4440 4458 if (onm == NULL) {
4441 4459 *cs->statusp = resp->status = NFS4ERR_INVAL;
4442 4460 goto out;
4443 4461 }
4444 4462 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4445 4463 nlen = MAXPATHLEN + 1;
4446 4464 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4447 4465 nlen);
4448 4466
4449 4467 if (converted_onm == NULL) {
4450 4468 *cs->statusp = resp->status = NFS4ERR_INVAL;
4451 4469 kmem_free(onm, olen);
4452 4470 goto out;
4453 4471 }
4454 4472
4455 4473 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4456 4474 if (nnm == NULL) {
4457 4475 *cs->statusp = resp->status = NFS4ERR_INVAL;
4458 4476 if (onm != converted_onm)
4459 4477 kmem_free(converted_onm, MAXPATHLEN + 1);
4460 4478 kmem_free(onm, olen);
4461 4479 goto out;
4462 4480 }
4463 4481 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4464 4482 MAXPATHLEN + 1);
4465 4483
4466 4484 if (converted_nnm == NULL) {
4467 4485 *cs->statusp = resp->status = NFS4ERR_INVAL;
4468 4486 kmem_free(nnm, nlen);
4469 4487 nnm = NULL;
4470 4488 if (onm != converted_onm)
4471 4489 kmem_free(converted_onm, MAXPATHLEN + 1);
4472 4490 kmem_free(onm, olen);
4473 4491 goto out;
4474 4492 }
4475 4493
4476 4494
4477 4495 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4478 4496 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4479 4497 kmem_free(onm, olen);
4480 4498 kmem_free(nnm, nlen);
4481 4499 goto out;
4482 4500 }
4483 4501
4484 4502
4485 4503 if (rdonly4(req, cs)) {
4486 4504 *cs->statusp = resp->status = NFS4ERR_ROFS;
4487 4505 if (onm != converted_onm)
4488 4506 kmem_free(converted_onm, MAXPATHLEN + 1);
4489 4507 kmem_free(onm, olen);
4490 4508 if (nnm != converted_nnm)
4491 4509 kmem_free(converted_nnm, MAXPATHLEN + 1);
4492 4510 kmem_free(nnm, nlen);
4493 4511 goto out;
4494 4512 }
4495 4513
4496 4514 /* check label of the target dir */
4497 4515 if (is_system_labeled()) {
4498 4516 ASSERT(req->rq_label != NULL);
4499 4517 clabel = req->rq_label;
4500 4518 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4501 4519 "got client label from request(1)",
4502 4520 struct svc_req *, req);
4503 4521 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4504 4522 if (!do_rfs_label_check(clabel, ndvp,
4505 4523 EQUALITY_CHECK, cs->exi)) {
4506 4524 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4507 4525 goto err_out;
4508 4526 }
4509 4527 }
4510 4528 }
4511 4529
4512 4530 /*
4513 4531 * Is the source a file and have a delegation?
4514 4532 * We don't need to acquire va_seq before these lookups, if
4515 4533 * it causes an update, cinfo.before will not match, which will
4516 4534 * trigger a cache flush even if atomic is TRUE.
4517 4535 */
4518 4536 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4519 4537 &error, cs->cr)) {
4520 4538 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4521 4539 NULL)) {
4522 4540 *cs->statusp = resp->status = NFS4ERR_DELAY;
4523 4541 goto err_out;
4524 4542 }
4525 4543 }
4526 4544
4527 4545 if (srcvp == NULL) {
4528 4546 *cs->statusp = resp->status = puterrno4(error);
4529 4547 if (onm != converted_onm)
4530 4548 kmem_free(converted_onm, MAXPATHLEN + 1);
4531 4549 kmem_free(onm, olen);
4532 4550 if (nnm != converted_nnm)
4533 4551 kmem_free(converted_nnm, MAXPATHLEN + 1);
4534 4552 kmem_free(nnm, nlen);
4535 4553 goto out;
4536 4554 }
4537 4555
4538 4556 sfp_rele_grant_hold = 1;
4539 4557
4540 4558 /* Does the destination exist and a file and have a delegation? */
4541 4559 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4542 4560 NULL, cs->cr)) {
4543 4561 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4544 4562 NULL)) {
4545 4563 *cs->statusp = resp->status = NFS4ERR_DELAY;
4546 4564 goto err_out;
4547 4565 }
4548 4566 }
4549 4567 fp_rele_grant_hold = 1;
4550 4568
4551 4569 /* Check for NBMAND lock on both source and target */
4552 4570 if (nbl_need_check(srcvp)) {
4553 4571 nbl_start_crit(srcvp, RW_READER);
4554 4572 in_crit_src = 1;
4555 4573 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4556 4574 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4557 4575 goto err_out;
4558 4576 }
4559 4577 }
4560 4578
4561 4579 if (targvp && nbl_need_check(targvp)) {
4562 4580 nbl_start_crit(targvp, RW_READER);
4563 4581 in_crit_targ = 1;
4564 4582 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4565 4583 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4566 4584 goto err_out;
4567 4585 }
4568 4586 }
4569 4587
4570 4588 /* Get source "before" change value */
4571 4589 obdva.va_mask = AT_CTIME|AT_SEQ;
4572 4590 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4573 4591 if (!error) {
4574 4592 nbdva.va_mask = AT_CTIME|AT_SEQ;
4575 4593 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4576 4594 }
4577 4595 if (error) {
4578 4596 *cs->statusp = resp->status = puterrno4(error);
4579 4597 goto err_out;
4580 4598 }
4581 4599
4582 4600 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4583 4601 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4584 4602
4585 4603 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4586 4604 NULL, 0);
4587 4605
4588 4606 /*
4589 4607 * If target existed and was unlinked by VOP_RENAME, state will need
4590 4608 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4591 4609 * any necessary nbl_end_crit on srcvp and tgtvp.
4592 4610 */
4593 4611 if (error == 0 && fp != NULL) {
4594 4612 rfs4_dbe_lock(fp->rf_dbe);
4595 4613 tvp = fp->rf_vp;
4596 4614 if (tvp)
4597 4615 VN_HOLD(tvp);
4598 4616 rfs4_dbe_unlock(fp->rf_dbe);
4599 4617
4600 4618 if (tvp) {
4601 4619 struct vattr va;
4602 4620 va.va_mask = AT_NLINK;
4603 4621
4604 4622 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4605 4623 va.va_nlink == 0) {
4606 4624 unlinked = 1;
4607 4625
4608 4626 /* DEBUG data */
4609 4627 if ((srcvp == targvp) || (tvp != targvp)) {
4610 4628 cmn_err(CE_WARN, "rfs4_op_rename: "
4611 4629 "srcvp %p, targvp: %p, tvp: %p",
4612 4630 (void *)srcvp, (void *)targvp,
4613 4631 (void *)tvp);
4614 4632 }
4615 4633 } else {
4616 4634 VN_RELE(tvp);
4617 4635 }
4618 4636 }
4619 4637 }
4620 4638 if (error == 0)
4621 4639 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4622 4640
4623 4641 if (in_crit_src)
4624 4642 nbl_end_crit(srcvp);
4625 4643 if (srcvp)
4626 4644 VN_RELE(srcvp);
4627 4645 if (in_crit_targ)
4628 4646 nbl_end_crit(targvp);
4629 4647 if (targvp)
4630 4648 VN_RELE(targvp);
4631 4649
4632 4650 if (unlinked) {
4633 4651 ASSERT(fp != NULL);
4634 4652 ASSERT(tvp != NULL);
4635 4653
4636 4654 /* DEBUG data */
4637 4655 if (RW_READ_HELD(&tvp->v_nbllock)) {
4638 4656 cmn_err(CE_WARN, "rfs4_op_rename: "
4639 4657 "RW_READ_HELD(%p)", (void *)tvp);
4640 4658 }
4641 4659
4642 4660 /* The file is gone and so should the state */
4643 4661 rfs4_close_all_state(fp);
4644 4662 VN_RELE(tvp);
4645 4663 }
4646 4664
4647 4665 if (sfp) {
4648 4666 rfs4_clear_dont_grant(sfp);
4649 4667 rfs4_file_rele(sfp);
4650 4668 }
4651 4669 if (fp) {
4652 4670 rfs4_clear_dont_grant(fp);
4653 4671 rfs4_file_rele(fp);
4654 4672 }
4655 4673
4656 4674 if (converted_onm != onm)
4657 4675 kmem_free(converted_onm, MAXPATHLEN + 1);
4658 4676 kmem_free(onm, olen);
4659 4677 if (converted_nnm != nnm)
4660 4678 kmem_free(converted_nnm, MAXPATHLEN + 1);
4661 4679 kmem_free(nnm, nlen);
4662 4680
4663 4681 /*
4664 4682 * Get the initial "after" sequence number, if it fails, set to zero
4665 4683 */
4666 4684 oidva.va_mask = AT_SEQ;
4667 4685 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4668 4686 oidva.va_seq = 0;
4669 4687
4670 4688 nidva.va_mask = AT_SEQ;
4671 4689 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4672 4690 nidva.va_seq = 0;
4673 4691
4674 4692 /*
4675 4693 * Force modified data and metadata out to stable storage.
4676 4694 */
4677 4695 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4678 4696 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4679 4697
4680 4698 if (error) {
4681 4699 *cs->statusp = resp->status = puterrno4(error);
4682 4700 goto out;
4683 4701 }
4684 4702
4685 4703 /*
4686 4704 * Get "after" change values, if it fails, simply return the
4687 4705 * before value.
4688 4706 */
4689 4707 oadva.va_mask = AT_CTIME|AT_SEQ;
4690 4708 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4691 4709 oadva.va_ctime = obdva.va_ctime;
4692 4710 oadva.va_seq = 0;
4693 4711 }
4694 4712
4695 4713 nadva.va_mask = AT_CTIME|AT_SEQ;
4696 4714 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4697 4715 nadva.va_ctime = nbdva.va_ctime;
4698 4716 nadva.va_seq = 0;
4699 4717 }
4700 4718
4701 4719 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4702 4720 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4703 4721
4704 4722 /*
4705 4723 * The cinfo.atomic = TRUE only if we have
4706 4724 * non-zero va_seq's, and it has incremented by exactly one
4707 4725 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4708 4726 */
4709 4727 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4710 4728 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4711 4729 resp->source_cinfo.atomic = TRUE;
4712 4730 else
4713 4731 resp->source_cinfo.atomic = FALSE;
4714 4732
4715 4733 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4716 4734 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4717 4735 resp->target_cinfo.atomic = TRUE;
4718 4736 else
4719 4737 resp->target_cinfo.atomic = FALSE;
4720 4738
4721 4739 #ifdef VOLATILE_FH_TEST
4722 4740 {
4723 4741 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4724 4742
4725 4743 /*
4726 4744 * Add the renamed file handle to the volatile rename list
4727 4745 */
4728 4746 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4729 4747 /* file handles may expire on rename */
4730 4748 vnode_t *vp;
4731 4749
4732 4750 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4733 4751 /*
4734 4752 * Already know that nnm will be a valid string
4735 4753 */
4736 4754 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4737 4755 NULL, NULL, NULL);
4738 4756 kmem_free(nnm, nlen);
4739 4757 if (!error) {
4740 4758 add_volrnm_fh(cs->exi, vp);
4741 4759 VN_RELE(vp);
4742 4760 }
4743 4761 }
4744 4762 }
4745 4763 #endif /* VOLATILE_FH_TEST */
4746 4764
4747 4765 *cs->statusp = resp->status = NFS4_OK;
4748 4766 out:
4749 4767 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4750 4768 RENAME4res *, resp);
4751 4769 return;
4752 4770
4753 4771 err_out:
4754 4772 if (onm != converted_onm)
4755 4773 kmem_free(converted_onm, MAXPATHLEN + 1);
4756 4774 if (onm != NULL)
4757 4775 kmem_free(onm, olen);
4758 4776 if (nnm != converted_nnm)
4759 4777 kmem_free(converted_nnm, MAXPATHLEN + 1);
4760 4778 if (nnm != NULL)
4761 4779 kmem_free(nnm, nlen);
4762 4780
4763 4781 if (in_crit_src) nbl_end_crit(srcvp);
4764 4782 if (in_crit_targ) nbl_end_crit(targvp);
4765 4783 if (targvp) VN_RELE(targvp);
4766 4784 if (srcvp) VN_RELE(srcvp);
4767 4785 if (sfp) {
4768 4786 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4769 4787 rfs4_file_rele(sfp);
4770 4788 }
4771 4789 if (fp) {
4772 4790 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4773 4791 rfs4_file_rele(fp);
4774 4792 }
4775 4793
4776 4794 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4777 4795 RENAME4res *, resp);
4778 4796 }
4779 4797
4780 4798 /* ARGSUSED */
4781 4799 static void
4782 4800 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4783 4801 struct compound_state *cs)
4784 4802 {
4785 4803 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4786 4804 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4787 4805 rfs4_client_t *cp;
4788 4806
4789 4807 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4790 4808 RENEW4args *, args);
4791 4809
4792 4810 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4793 4811 *cs->statusp = resp->status =
4794 4812 rfs4_check_clientid(&args->clientid, 0);
4795 4813 goto out;
4796 4814 }
4797 4815
4798 4816 if (rfs4_lease_expired(cp)) {
4799 4817 rfs4_client_rele(cp);
4800 4818 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4801 4819 goto out;
4802 4820 }
4803 4821
4804 4822 rfs4_update_lease(cp);
4805 4823
4806 4824 mutex_enter(cp->rc_cbinfo.cb_lock);
4807 4825 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4808 4826 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4809 4827 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4810 4828 } else {
4811 4829 *cs->statusp = resp->status = NFS4_OK;
4812 4830 }
4813 4831 mutex_exit(cp->rc_cbinfo.cb_lock);
4814 4832
4815 4833 rfs4_client_rele(cp);
4816 4834
4817 4835 out:
4818 4836 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4819 4837 RENEW4res *, resp);
4820 4838 }
4821 4839
4822 4840 /* ARGSUSED */
4823 4841 static void
4824 4842 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4825 4843 struct compound_state *cs)
4826 4844 {
4827 4845 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4828 4846
4829 4847 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4830 4848
4831 4849 /* No need to check cs->access - we are not accessing any object */
4832 4850 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4833 4851 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4834 4852 goto out;
4835 4853 }
4836 4854 if (cs->vp != NULL) {
4837 4855 VN_RELE(cs->vp);
4838 4856 }
4839 4857 cs->vp = cs->saved_vp;
4840 4858 cs->saved_vp = NULL;
4841 4859 cs->exi = cs->saved_exi;
4842 4860 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4843 4861 *cs->statusp = resp->status = NFS4_OK;
4844 4862 cs->deleg = FALSE;
4845 4863
4846 4864 out:
4847 4865 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4848 4866 RESTOREFH4res *, resp);
4849 4867 }
4850 4868
4851 4869 /* ARGSUSED */
4852 4870 static void
4853 4871 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4854 4872 struct compound_state *cs)
4855 4873 {
4856 4874 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4857 4875
4858 4876 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4859 4877
4860 4878 /* No need to check cs->access - we are not accessing any object */
4861 4879 if (cs->vp == NULL) {
4862 4880 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4863 4881 goto out;
4864 4882 }
4865 4883 if (cs->saved_vp != NULL) {
4866 4884 VN_RELE(cs->saved_vp);
4867 4885 }
4868 4886 cs->saved_vp = cs->vp;
4869 4887 VN_HOLD(cs->saved_vp);
4870 4888 cs->saved_exi = cs->exi;
4871 4889 /*
4872 4890 * since SAVEFH is fairly rare, don't alloc space for its fh
4873 4891 * unless necessary.
4874 4892 */
4875 4893 if (cs->saved_fh.nfs_fh4_val == NULL) {
4876 4894 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4877 4895 }
4878 4896 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4879 4897 *cs->statusp = resp->status = NFS4_OK;
4880 4898
4881 4899 out:
4882 4900 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4883 4901 SAVEFH4res *, resp);
4884 4902 }
4885 4903
4886 4904 /*
4887 4905 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4888 4906 * return the bitmap of attrs that were set successfully. It is also
4889 4907 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4890 4908 * always be called only after rfs4_do_set_attrs().
4891 4909 *
4892 4910 * Verify that the attributes are same as the expected ones. sargp->vap
4893 4911 * and sargp->sbp contain the input attributes as translated from fattr4.
4894 4912 *
4895 4913 * This function verifies only the attrs that correspond to a vattr or
4896 4914 * vfsstat struct. That is because of the extra step needed to get the
4897 4915 * corresponding system structs. Other attributes have already been set or
4898 4916 * verified by do_rfs4_set_attrs.
4899 4917 *
4900 4918 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4901 4919 */
4902 4920 static int
4903 4921 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4904 4922 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4905 4923 {
4906 4924 int error, ret_error = 0;
4907 4925 int i, k;
4908 4926 uint_t sva_mask = sargp->vap->va_mask;
4909 4927 uint_t vbit;
4910 4928 union nfs4_attr_u *na;
4911 4929 uint8_t *amap;
4912 4930 bool_t getsb = ntovp->vfsstat;
4913 4931
4914 4932 if (sva_mask != 0) {
4915 4933 /*
4916 4934 * Okay to overwrite sargp->vap because we verify based
4917 4935 * on the incoming values.
4918 4936 */
4919 4937 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4920 4938 sargp->cs->cr, NULL);
4921 4939 if (ret_error) {
4922 4940 if (resp == NULL)
4923 4941 return (ret_error);
4924 4942 /*
4925 4943 * Must return bitmap of successful attrs
4926 4944 */
4927 4945 sva_mask = 0; /* to prevent checking vap later */
4928 4946 } else {
4929 4947 /*
4930 4948 * Some file systems clobber va_mask. it is probably
4931 4949 * wrong of them to do so, nonethless we practice
4932 4950 * defensive coding.
4933 4951 * See bug id 4276830.
4934 4952 */
4935 4953 sargp->vap->va_mask = sva_mask;
4936 4954 }
4937 4955 }
4938 4956
4939 4957 if (getsb) {
4940 4958 /*
4941 4959 * Now get the superblock and loop on the bitmap, as there is
4942 4960 * no simple way of translating from superblock to bitmap4.
4943 4961 */
4944 4962 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4945 4963 if (ret_error) {
4946 4964 if (resp == NULL)
4947 4965 goto errout;
4948 4966 getsb = FALSE;
4949 4967 }
4950 4968 }
4951 4969
4952 4970 /*
4953 4971 * Now loop and verify each attribute which getattr returned
4954 4972 * whether it's the same as the input.
4955 4973 */
4956 4974 if (resp == NULL && !getsb && (sva_mask == 0))
4957 4975 goto errout;
4958 4976
4959 4977 na = ntovp->na;
4960 4978 amap = ntovp->amap;
4961 4979 k = 0;
4962 4980 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4963 4981 k = *amap;
4964 4982 ASSERT(nfs4_ntov_map[k].nval == k);
4965 4983 vbit = nfs4_ntov_map[k].vbit;
4966 4984
4967 4985 /*
4968 4986 * If vattr attribute but VOP_GETATTR failed, or it's
4969 4987 * superblock attribute but VFS_STATVFS failed, skip
4970 4988 */
4971 4989 if (vbit) {
4972 4990 if ((vbit & sva_mask) == 0)
4973 4991 continue;
4974 4992 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4975 4993 continue;
4976 4994 }
4977 4995 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4978 4996 if (resp != NULL) {
4979 4997 if (error)
4980 4998 ret_error = -1; /* not all match */
4981 4999 else /* update response bitmap */
4982 5000 *resp |= nfs4_ntov_map[k].fbit;
4983 5001 continue;
4984 5002 }
4985 5003 if (error) {
4986 5004 ret_error = -1; /* not all match */
4987 5005 break;
4988 5006 }
4989 5007 }
4990 5008 errout:
4991 5009 return (ret_error);
4992 5010 }
4993 5011
4994 5012 /*
4995 5013 * Decode the attribute to be set/verified. If the attr requires a sys op
4996 5014 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4997 5015 * call the sv_getit function for it, because the sys op hasn't yet been done.
4998 5016 * Return 0 for success, error code if failed.
4999 5017 *
5000 5018 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5001 5019 */
5002 5020 static int
5003 5021 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5004 5022 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5005 5023 {
5006 5024 int error = 0;
5007 5025 bool_t set_later;
5008 5026
5009 5027 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5010 5028
5011 5029 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5012 5030 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5013 5031 /*
5014 5032 * don't verify yet if a vattr or sb dependent attr,
5015 5033 * because we don't have their sys values yet.
5016 5034 * Will be done later.
5017 5035 */
5018 5036 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5019 5037 /*
5020 5038 * ACLs are a special case, since setting the MODE
5021 5039 * conflicts with setting the ACL. We delay setting
5022 5040 * the ACL until all other attributes have been set.
5023 5041 * The ACL gets set in do_rfs4_op_setattr().
5024 5042 */
5025 5043 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5026 5044 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5027 5045 sargp, nap);
5028 5046 if (error) {
5029 5047 xdr_free(nfs4_ntov_map[k].xfunc,
5030 5048 (caddr_t)nap);
5031 5049 }
5032 5050 }
5033 5051 }
5034 5052 } else {
5035 5053 #ifdef DEBUG
5036 5054 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5037 5055 "decoding attribute %d\n", k);
5038 5056 #endif
5039 5057 error = EINVAL;
5040 5058 }
5041 5059 if (!error && resp_bval && !set_later) {
5042 5060 *resp_bval |= nfs4_ntov_map[k].fbit;
5043 5061 }
5044 5062
5045 5063 return (error);
5046 5064 }
5047 5065
5048 5066 /*
5049 5067 * Set vattr based on incoming fattr4 attrs - used by setattr.
5050 5068 * Set response mask. Ignore any values that are not writable vattr attrs.
5051 5069 */
5052 5070 static nfsstat4
5053 5071 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5054 5072 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5055 5073 nfs4_attr_cmd_t cmd)
5056 5074 {
5057 5075 int error = 0;
5058 5076 int i;
5059 5077 char *attrs = fattrp->attrlist4;
5060 5078 uint32_t attrslen = fattrp->attrlist4_len;
5061 5079 XDR xdr;
5062 5080 nfsstat4 status = NFS4_OK;
5063 5081 vnode_t *vp = cs->vp;
5064 5082 union nfs4_attr_u *na;
5065 5083 uint8_t *amap;
5066 5084
5067 5085 #ifndef lint
5068 5086 /*
5069 5087 * Make sure that maximum attribute number can be expressed as an
5070 5088 * 8 bit quantity.
5071 5089 */
5072 5090 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5073 5091 #endif
5074 5092
5075 5093 if (vp == NULL) {
5076 5094 if (resp)
5077 5095 *resp = 0;
5078 5096 return (NFS4ERR_NOFILEHANDLE);
5079 5097 }
5080 5098 if (cs->access == CS_ACCESS_DENIED) {
5081 5099 if (resp)
5082 5100 *resp = 0;
5083 5101 return (NFS4ERR_ACCESS);
5084 5102 }
5085 5103
5086 5104 sargp->op = cmd;
5087 5105 sargp->cs = cs;
5088 5106 sargp->flag = 0; /* may be set later */
5089 5107 sargp->vap->va_mask = 0;
5090 5108 sargp->rdattr_error = NFS4_OK;
5091 5109 sargp->rdattr_error_req = FALSE;
5092 5110 /* sargp->sbp is set by the caller */
5093 5111
5094 5112 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5095 5113
5096 5114 na = ntovp->na;
5097 5115 amap = ntovp->amap;
5098 5116
5099 5117 /*
5100 5118 * The following loop iterates on the nfs4_ntov_map checking
5101 5119 * if the fbit is set in the requested bitmap.
5102 5120 * If set then we process the arguments using the
5103 5121 * rfs4_fattr4 conversion functions to populate the setattr
5104 5122 * vattr and va_mask. Any settable attrs that are not using vattr
5105 5123 * will be set in this loop.
5106 5124 */
5107 5125 for (i = 0; i < nfs4_ntov_map_size; i++) {
5108 5126 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5109 5127 continue;
5110 5128 }
5111 5129 /*
5112 5130 * If setattr, must be a writable attr.
5113 5131 * If verify/nverify, must be a readable attr.
5114 5132 */
5115 5133 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5116 5134 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5117 5135 /*
5118 5136 * Client tries to set/verify an
5119 5137 * unsupported attribute, tries to set
5120 5138 * a read only attr or verify a write
5121 5139 * only one - error!
5122 5140 */
5123 5141 break;
5124 5142 }
5125 5143 /*
5126 5144 * Decode the attribute to set/verify
5127 5145 */
5128 5146 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5129 5147 &xdr, resp ? resp : NULL, na);
5130 5148 if (error)
5131 5149 break;
5132 5150 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5133 5151 na++;
5134 5152 (ntovp->attrcnt)++;
5135 5153 if (nfs4_ntov_map[i].vfsstat)
5136 5154 ntovp->vfsstat = TRUE;
5137 5155 }
5138 5156
5139 5157 if (error != 0)
5140 5158 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5141 5159 puterrno4(error));
5142 5160 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5143 5161 return (status);
5144 5162 }
5145 5163
5146 5164 static nfsstat4
5147 5165 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5148 5166 stateid4 *stateid)
5149 5167 {
5150 5168 int error = 0;
5151 5169 struct nfs4_svgetit_arg sarg;
5152 5170 bool_t trunc;
5153 5171
5154 5172 nfsstat4 status = NFS4_OK;
5155 5173 cred_t *cr = cs->cr;
5156 5174 vnode_t *vp = cs->vp;
5157 5175 struct nfs4_ntov_table ntov;
5158 5176 struct statvfs64 sb;
5159 5177 struct vattr bva;
5160 5178 struct flock64 bf;
5161 5179 int in_crit = 0;
5162 5180 uint_t saved_mask = 0;
5163 5181 caller_context_t ct;
5164 5182
5165 5183 *resp = 0;
5166 5184 sarg.sbp = &sb;
5167 5185 sarg.is_referral = B_FALSE;
5168 5186 nfs4_ntov_table_init(&ntov);
5169 5187 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5170 5188 NFS4ATTR_SETIT);
5171 5189 if (status != NFS4_OK) {
5172 5190 /*
5173 5191 * failed set attrs
5174 5192 */
5175 5193 goto done;
5176 5194 }
5177 5195 if ((sarg.vap->va_mask == 0) &&
5178 5196 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5179 5197 /*
5180 5198 * no further work to be done
5181 5199 */
5182 5200 goto done;
5183 5201 }
5184 5202
5185 5203 /*
5186 5204 * If we got a request to set the ACL and the MODE, only
5187 5205 * allow changing VSUID, VSGID, and VSVTX. Attempting
5188 5206 * to change any other bits, along with setting an ACL,
5189 5207 * gives NFS4ERR_INVAL.
5190 5208 */
5191 5209 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5192 5210 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5193 5211 vattr_t va;
5194 5212
5195 5213 va.va_mask = AT_MODE;
5196 5214 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5197 5215 if (error) {
5198 5216 status = puterrno4(error);
5199 5217 goto done;
5200 5218 }
5201 5219 if ((sarg.vap->va_mode ^ va.va_mode) &
5202 5220 ~(VSUID | VSGID | VSVTX)) {
5203 5221 status = NFS4ERR_INVAL;
5204 5222 goto done;
5205 5223 }
5206 5224 }
5207 5225
5208 5226 /* Check stateid only if size has been set */
5209 5227 if (sarg.vap->va_mask & AT_SIZE) {
5210 5228 trunc = (sarg.vap->va_size == 0);
5211 5229 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5212 5230 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5213 5231 if (status != NFS4_OK)
5214 5232 goto done;
5215 5233 } else {
5216 5234 ct.cc_sysid = 0;
5217 5235 ct.cc_pid = 0;
5218 5236 ct.cc_caller_id = nfs4_srv_caller_id;
5219 5237 ct.cc_flags = CC_DONTBLOCK;
5220 5238 }
5221 5239
5222 5240 /* XXX start of possible race with delegations */
5223 5241
5224 5242 /*
5225 5243 * We need to specially handle size changes because it is
5226 5244 * possible for the client to create a file with read-only
5227 5245 * modes, but with the file opened for writing. If the client
5228 5246 * then tries to set the file size, e.g. ftruncate(3C),
5229 5247 * fcntl(F_FREESP), the normal access checking done in
5230 5248 * VOP_SETATTR would prevent the client from doing it even though
5231 5249 * it should be allowed to do so. To get around this, we do the
5232 5250 * access checking for ourselves and use VOP_SPACE which doesn't
5233 5251 * do the access checking.
5234 5252 * Also the client should not be allowed to change the file
5235 5253 * size if there is a conflicting non-blocking mandatory lock in
5236 5254 * the region of the change.
5237 5255 */
5238 5256 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5239 5257 u_offset_t offset;
5240 5258 ssize_t length;
5241 5259
5242 5260 /*
5243 5261 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5244 5262 * before returning, sarg.vap->va_mask is used to
5245 5263 * generate the setattr reply bitmap. We also clear
5246 5264 * AT_SIZE below before calling VOP_SPACE. For both
5247 5265 * of these cases, the va_mask needs to be saved here
5248 5266 * and restored after calling VOP_SETATTR.
5249 5267 */
5250 5268 saved_mask = sarg.vap->va_mask;
5251 5269
5252 5270 /*
5253 5271 * Check any possible conflict due to NBMAND locks.
5254 5272 * Get into critical region before VOP_GETATTR, so the
5255 5273 * size attribute is valid when checking conflicts.
5256 5274 */
5257 5275 if (nbl_need_check(vp)) {
5258 5276 nbl_start_crit(vp, RW_READER);
5259 5277 in_crit = 1;
5260 5278 }
5261 5279
5262 5280 bva.va_mask = AT_UID|AT_SIZE;
5263 5281 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5264 5282 status = puterrno4(error);
5265 5283 goto done;
5266 5284 }
5267 5285
5268 5286 if (in_crit) {
5269 5287 if (sarg.vap->va_size < bva.va_size) {
5270 5288 offset = sarg.vap->va_size;
5271 5289 length = bva.va_size - sarg.vap->va_size;
5272 5290 } else {
5273 5291 offset = bva.va_size;
5274 5292 length = sarg.vap->va_size - bva.va_size;
5275 5293 }
5276 5294 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5277 5295 &ct)) {
5278 5296 status = NFS4ERR_LOCKED;
5279 5297 goto done;
5280 5298 }
5281 5299 }
5282 5300
5283 5301 if (crgetuid(cr) == bva.va_uid) {
5284 5302 sarg.vap->va_mask &= ~AT_SIZE;
5285 5303 bf.l_type = F_WRLCK;
5286 5304 bf.l_whence = 0;
5287 5305 bf.l_start = (off64_t)sarg.vap->va_size;
5288 5306 bf.l_len = 0;
5289 5307 bf.l_sysid = 0;
5290 5308 bf.l_pid = 0;
5291 5309 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5292 5310 (offset_t)sarg.vap->va_size, cr, &ct);
5293 5311 }
5294 5312 }
5295 5313
5296 5314 if (!error && sarg.vap->va_mask != 0)
5297 5315 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5298 5316
5299 5317 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5300 5318 if (saved_mask & AT_SIZE)
5301 5319 sarg.vap->va_mask |= AT_SIZE;
5302 5320
5303 5321 /*
5304 5322 * If an ACL was being set, it has been delayed until now,
5305 5323 * in order to set the mode (via the VOP_SETATTR() above) first.
5306 5324 */
5307 5325 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5308 5326 int i;
5309 5327
5310 5328 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5311 5329 if (ntov.amap[i] == FATTR4_ACL)
5312 5330 break;
5313 5331 if (i < NFS4_MAXNUM_ATTRS) {
5314 5332 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5315 5333 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5316 5334 if (error == 0) {
5317 5335 *resp |= FATTR4_ACL_MASK;
5318 5336 } else if (error == ENOTSUP) {
5319 5337 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5320 5338 status = NFS4ERR_ATTRNOTSUPP;
5321 5339 goto done;
5322 5340 }
5323 5341 } else {
5324 5342 NFS4_DEBUG(rfs4_debug,
5325 5343 (CE_NOTE, "do_rfs4_op_setattr: "
5326 5344 "unable to find ACL in fattr4"));
5327 5345 error = EINVAL;
5328 5346 }
5329 5347 }
5330 5348
5331 5349 if (error) {
5332 5350 /* check if a monitor detected a delegation conflict */
5333 5351 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5334 5352 status = NFS4ERR_DELAY;
5335 5353 else
5336 5354 status = puterrno4(error);
5337 5355
5338 5356 /*
5339 5357 * Set the response bitmap when setattr failed.
5340 5358 * If VOP_SETATTR partially succeeded, test by doing a
5341 5359 * VOP_GETATTR on the object and comparing the data
5342 5360 * to the setattr arguments.
5343 5361 */
5344 5362 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5345 5363 } else {
5346 5364 /*
5347 5365 * Force modified metadata out to stable storage.
5348 5366 */
5349 5367 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5350 5368 /*
5351 5369 * Set response bitmap
5352 5370 */
5353 5371 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5354 5372 }
5355 5373
5356 5374 /* Return early and already have a NFSv4 error */
5357 5375 done:
5358 5376 /*
5359 5377 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5360 5378 * conversion sets both readable and writeable NFS4 attrs
5361 5379 * for AT_MTIME and AT_ATIME. The line below masks out
5362 5380 * unrequested attrs from the setattr result bitmap. This
5363 5381 * is placed after the done: label to catch the ATTRNOTSUP
5364 5382 * case.
5365 5383 */
5366 5384 *resp &= fattrp->attrmask;
5367 5385
5368 5386 if (in_crit)
5369 5387 nbl_end_crit(vp);
5370 5388
5371 5389 nfs4_ntov_table_free(&ntov, &sarg);
5372 5390
5373 5391 return (status);
5374 5392 }
5375 5393
5376 5394 /* ARGSUSED */
5377 5395 static void
5378 5396 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5379 5397 struct compound_state *cs)
5380 5398 {
5381 5399 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5382 5400 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5383 5401 bslabel_t *clabel;
5384 5402
5385 5403 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5386 5404 SETATTR4args *, args);
5387 5405
5388 5406 if (cs->vp == NULL) {
5389 5407 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5390 5408 goto out;
5391 5409 }
5392 5410
5393 5411 /*
5394 5412 * If there is an unshared filesystem mounted on this vnode,
5395 5413 * do not allow to setattr on this vnode.
5396 5414 */
5397 5415 if (vn_ismntpt(cs->vp)) {
5398 5416 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5399 5417 goto out;
5400 5418 }
5401 5419
5402 5420 resp->attrsset = 0;
5403 5421
5404 5422 if (rdonly4(req, cs)) {
5405 5423 *cs->statusp = resp->status = NFS4ERR_ROFS;
5406 5424 goto out;
5407 5425 }
5408 5426
5409 5427 /* check label before setting attributes */
5410 5428 if (is_system_labeled()) {
5411 5429 ASSERT(req->rq_label != NULL);
5412 5430 clabel = req->rq_label;
5413 5431 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5414 5432 "got client label from request(1)",
5415 5433 struct svc_req *, req);
5416 5434 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5417 5435 if (!do_rfs_label_check(clabel, cs->vp,
5418 5436 EQUALITY_CHECK, cs->exi)) {
5419 5437 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5420 5438 goto out;
5421 5439 }
5422 5440 }
5423 5441 }
5424 5442
5425 5443 *cs->statusp = resp->status =
5426 5444 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5427 5445 &args->stateid);
5428 5446
5429 5447 out:
5430 5448 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5431 5449 SETATTR4res *, resp);
5432 5450 }
5433 5451
5434 5452 /* ARGSUSED */
5435 5453 static void
5436 5454 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5437 5455 struct compound_state *cs)
5438 5456 {
5439 5457 /*
5440 5458 * verify and nverify are exactly the same, except that nverify
5441 5459 * succeeds when some argument changed, and verify succeeds when
5442 5460 * when none changed.
5443 5461 */
5444 5462
5445 5463 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5446 5464 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5447 5465
5448 5466 int error;
5449 5467 struct nfs4_svgetit_arg sarg;
5450 5468 struct statvfs64 sb;
5451 5469 struct nfs4_ntov_table ntov;
5452 5470
5453 5471 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5454 5472 VERIFY4args *, args);
5455 5473
5456 5474 if (cs->vp == NULL) {
5457 5475 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5458 5476 goto out;
5459 5477 }
5460 5478
5461 5479 sarg.sbp = &sb;
5462 5480 sarg.is_referral = B_FALSE;
5463 5481 nfs4_ntov_table_init(&ntov);
5464 5482 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5465 5483 &sarg, &ntov, NFS4ATTR_VERIT);
5466 5484 if (resp->status != NFS4_OK) {
5467 5485 /*
5468 5486 * do_rfs4_set_attrs will try to verify systemwide attrs,
5469 5487 * so could return -1 for "no match".
5470 5488 */
5471 5489 if (resp->status == -1)
5472 5490 resp->status = NFS4ERR_NOT_SAME;
5473 5491 goto done;
5474 5492 }
5475 5493 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5476 5494 switch (error) {
5477 5495 case 0:
5478 5496 resp->status = NFS4_OK;
5479 5497 break;
5480 5498 case -1:
5481 5499 resp->status = NFS4ERR_NOT_SAME;
5482 5500 break;
5483 5501 default:
5484 5502 resp->status = puterrno4(error);
5485 5503 break;
5486 5504 }
5487 5505 done:
5488 5506 *cs->statusp = resp->status;
5489 5507 nfs4_ntov_table_free(&ntov, &sarg);
5490 5508 out:
5491 5509 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5492 5510 VERIFY4res *, resp);
5493 5511 }
5494 5512
5495 5513 /* ARGSUSED */
5496 5514 static void
5497 5515 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5498 5516 struct compound_state *cs)
5499 5517 {
5500 5518 /*
5501 5519 * verify and nverify are exactly the same, except that nverify
5502 5520 * succeeds when some argument changed, and verify succeeds when
5503 5521 * when none changed.
5504 5522 */
5505 5523
5506 5524 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5507 5525 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5508 5526
5509 5527 int error;
5510 5528 struct nfs4_svgetit_arg sarg;
5511 5529 struct statvfs64 sb;
5512 5530 struct nfs4_ntov_table ntov;
5513 5531
5514 5532 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5515 5533 NVERIFY4args *, args);
5516 5534
5517 5535 if (cs->vp == NULL) {
5518 5536 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5519 5537 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5520 5538 NVERIFY4res *, resp);
5521 5539 return;
5522 5540 }
5523 5541 sarg.sbp = &sb;
5524 5542 sarg.is_referral = B_FALSE;
5525 5543 nfs4_ntov_table_init(&ntov);
5526 5544 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5527 5545 &sarg, &ntov, NFS4ATTR_VERIT);
5528 5546 if (resp->status != NFS4_OK) {
5529 5547 /*
5530 5548 * do_rfs4_set_attrs will try to verify systemwide attrs,
5531 5549 * so could return -1 for "no match".
5532 5550 */
5533 5551 if (resp->status == -1)
5534 5552 resp->status = NFS4_OK;
5535 5553 goto done;
5536 5554 }
5537 5555 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5538 5556 switch (error) {
5539 5557 case 0:
5540 5558 resp->status = NFS4ERR_SAME;
5541 5559 break;
5542 5560 case -1:
5543 5561 resp->status = NFS4_OK;
5544 5562 break;
5545 5563 default:
5546 5564 resp->status = puterrno4(error);
5547 5565 break;
5548 5566 }
5549 5567 done:
5550 5568 *cs->statusp = resp->status;
5551 5569 nfs4_ntov_table_free(&ntov, &sarg);
5552 5570
5553 5571 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5554 5572 NVERIFY4res *, resp);
5555 5573 }
5556 5574
5557 5575 /*
5558 5576 * XXX - This should live in an NFS header file.
5559 5577 */
5560 5578 #define MAX_IOVECS 12
5561 5579
5562 5580 /* ARGSUSED */
5563 5581 static void
5564 5582 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5565 5583 struct compound_state *cs)
5566 5584 {
5567 5585 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5568 5586 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5569 5587 int error;
5570 5588 vnode_t *vp;
5571 5589 struct vattr bva;
5572 5590 u_offset_t rlimit;
5573 5591 struct uio uio;
5574 5592 struct iovec iov[MAX_IOVECS];
5575 5593 struct iovec *iovp;
5576 5594 int iovcnt;
5577 5595 int ioflag;
5578 5596 cred_t *savecred, *cr;
5579 5597 bool_t *deleg = &cs->deleg;
5580 5598 nfsstat4 stat;
5581 5599 int in_crit = 0;
5582 5600 caller_context_t ct;
5583 5601 nfs4_srv_t *nsrv4;
5584 5602
5585 5603 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5586 5604 WRITE4args *, args);
5587 5605
5588 5606 vp = cs->vp;
5589 5607 if (vp == NULL) {
5590 5608 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5591 5609 goto out;
5592 5610 }
5593 5611 if (cs->access == CS_ACCESS_DENIED) {
5594 5612 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5595 5613 goto out;
5596 5614 }
5597 5615
5598 5616 cr = cs->cr;
5599 5617
5600 5618 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5601 5619 deleg, TRUE, &ct)) != NFS4_OK) {
5602 5620 *cs->statusp = resp->status = stat;
5603 5621 goto out;
5604 5622 }
5605 5623
5606 5624 /*
5607 5625 * We have to enter the critical region before calling VOP_RWLOCK
5608 5626 * to avoid a deadlock with ufs.
5609 5627 */
5610 5628 if (nbl_need_check(vp)) {
5611 5629 nbl_start_crit(vp, RW_READER);
5612 5630 in_crit = 1;
5613 5631 if (nbl_conflict(vp, NBL_WRITE,
5614 5632 args->offset, args->data_len, 0, &ct)) {
5615 5633 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5616 5634 goto out;
5617 5635 }
5618 5636 }
5619 5637
5620 5638 bva.va_mask = AT_MODE | AT_UID;
5621 5639 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5622 5640
5623 5641 /*
5624 5642 * If we can't get the attributes, then we can't do the
5625 5643 * right access checking. So, we'll fail the request.
5626 5644 */
5627 5645 if (error) {
5628 5646 *cs->statusp = resp->status = puterrno4(error);
5629 5647 goto out;
5630 5648 }
5631 5649
5632 5650 if (rdonly4(req, cs)) {
5633 5651 *cs->statusp = resp->status = NFS4ERR_ROFS;
5634 5652 goto out;
5635 5653 }
5636 5654
5637 5655 if (vp->v_type != VREG) {
5638 5656 *cs->statusp = resp->status =
5639 5657 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5640 5658 goto out;
5641 5659 }
5642 5660
5643 5661 if (crgetuid(cr) != bva.va_uid &&
5644 5662 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5645 5663 *cs->statusp = resp->status = puterrno4(error);
5646 5664 goto out;
5647 5665 }
5648 5666
5649 5667 if (MANDLOCK(vp, bva.va_mode)) {
5650 5668 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5651 5669 goto out;
5652 5670 }
5653 5671
5654 5672 nsrv4 = nfs4_get_srv();
5655 5673 if (args->data_len == 0) {
5656 5674 *cs->statusp = resp->status = NFS4_OK;
5657 5675 resp->count = 0;
5658 5676 resp->committed = args->stable;
5659 5677 resp->writeverf = nsrv4->write4verf;
5660 5678 goto out;
5661 5679 }
5662 5680
5663 5681 if (args->mblk != NULL) {
5664 5682 mblk_t *m;
5665 5683 uint_t bytes, round_len;
5666 5684
5667 5685 iovcnt = 0;
5668 5686 bytes = 0;
5669 5687 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5670 5688 for (m = args->mblk;
5671 5689 m != NULL && bytes < round_len;
5672 5690 m = m->b_cont) {
5673 5691 iovcnt++;
5674 5692 bytes += MBLKL(m);
5675 5693 }
5676 5694 #ifdef DEBUG
5677 5695 /* should have ended on an mblk boundary */
5678 5696 if (bytes != round_len) {
5679 5697 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5680 5698 bytes, round_len, args->data_len);
5681 5699 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5682 5700 (void *)args->mblk, (void *)m);
5683 5701 ASSERT(bytes == round_len);
5684 5702 }
5685 5703 #endif
5686 5704 if (iovcnt <= MAX_IOVECS) {
5687 5705 iovp = iov;
5688 5706 } else {
5689 5707 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5690 5708 }
5691 5709 mblk_to_iov(args->mblk, iovcnt, iovp);
5692 5710 } else if (args->rlist != NULL) {
5693 5711 iovcnt = 1;
5694 5712 iovp = iov;
5695 5713 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5696 5714 iovp->iov_len = args->data_len;
5697 5715 } else {
5698 5716 iovcnt = 1;
5699 5717 iovp = iov;
5700 5718 iovp->iov_base = args->data_val;
5701 5719 iovp->iov_len = args->data_len;
5702 5720 }
5703 5721
5704 5722 uio.uio_iov = iovp;
5705 5723 uio.uio_iovcnt = iovcnt;
5706 5724
5707 5725 uio.uio_segflg = UIO_SYSSPACE;
5708 5726 uio.uio_extflg = UIO_COPY_DEFAULT;
5709 5727 uio.uio_loffset = args->offset;
5710 5728 uio.uio_resid = args->data_len;
5711 5729 uio.uio_llimit = curproc->p_fsz_ctl;
5712 5730 rlimit = uio.uio_llimit - args->offset;
5713 5731 if (rlimit < (u_offset_t)uio.uio_resid)
5714 5732 uio.uio_resid = (int)rlimit;
5715 5733
5716 5734 if (args->stable == UNSTABLE4)
5717 5735 ioflag = 0;
5718 5736 else if (args->stable == FILE_SYNC4)
5719 5737 ioflag = FSYNC;
5720 5738 else if (args->stable == DATA_SYNC4)
5721 5739 ioflag = FDSYNC;
5722 5740 else {
5723 5741 if (iovp != iov)
5724 5742 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5725 5743 *cs->statusp = resp->status = NFS4ERR_INVAL;
5726 5744 goto out;
5727 5745 }
5728 5746
5729 5747 /*
5730 5748 * We're changing creds because VM may fault and we need
5731 5749 * the cred of the current thread to be used if quota
5732 5750 * checking is enabled.
5733 5751 */
5734 5752 savecred = curthread->t_cred;
5735 5753 curthread->t_cred = cr;
5736 5754 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5737 5755 curthread->t_cred = savecred;
5738 5756
5739 5757 if (iovp != iov)
5740 5758 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5741 5759
5742 5760 if (error) {
5743 5761 *cs->statusp = resp->status = puterrno4(error);
5744 5762 goto out;
5745 5763 }
5746 5764
5747 5765 *cs->statusp = resp->status = NFS4_OK;
5748 5766 resp->count = args->data_len - uio.uio_resid;
5749 5767
5750 5768 if (ioflag == 0)
5751 5769 resp->committed = UNSTABLE4;
5752 5770 else
5753 5771 resp->committed = FILE_SYNC4;
5754 5772
5755 5773 resp->writeverf = nsrv4->write4verf;
5756 5774
5757 5775 out:
5758 5776 if (in_crit)
5759 5777 nbl_end_crit(vp);
5760 5778
5761 5779 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5762 5780 WRITE4res *, resp);
5763 5781 }
5764 5782
5765 5783
5766 5784 /* XXX put in a header file */
5767 5785 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5768 5786
5769 5787 void
5770 5788 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5771 5789 struct svc_req *req, cred_t *cr, int *rv)
5772 5790 {
5773 5791 uint_t i;
5774 5792 struct compound_state cs;
5775 5793 nfs4_srv_t *nsrv4;
5776 5794 nfs_export_t *ne = nfs_get_export();
5777 5795
5778 5796 if (rv != NULL)
5779 5797 *rv = 0;
5780 5798 rfs4_init_compound_state(&cs);
5781 5799 /*
5782 5800 * Form a reply tag by copying over the reqeuest tag.
5783 5801 */
5784 5802 resp->tag.utf8string_val =
5785 5803 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5786 5804 resp->tag.utf8string_len = args->tag.utf8string_len;
5787 5805 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5788 5806 resp->tag.utf8string_len);
5789 5807
5790 5808 cs.statusp = &resp->status;
5791 5809 cs.req = req;
5792 5810 resp->array = NULL;
5793 5811 resp->array_len = 0;
5794 5812
5795 5813 /*
5796 5814 * XXX for now, minorversion should be zero
5797 5815 */
5798 5816 if (args->minorversion != NFS4_MINORVERSION) {
5799 5817 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5800 5818 &cs, COMPOUND4args *, args);
5801 5819 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5802 5820 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5803 5821 &cs, COMPOUND4res *, resp);
5804 5822 return;
5805 5823 }
5806 5824
5807 5825 if (args->array_len == 0) {
5808 5826 resp->status = NFS4_OK;
5809 5827 return;
5810 5828 }
5811 5829
5812 5830 ASSERT(exi == NULL);
5813 5831 ASSERT(cr == NULL);
5814 5832
5815 5833 cr = crget();
5816 5834 ASSERT(cr != NULL);
5817 5835
5818 5836 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5819 5837 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5820 5838 &cs, COMPOUND4args *, args);
5821 5839 crfree(cr);
5822 5840 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5823 5841 &cs, COMPOUND4res *, resp);
5824 5842 svcerr_badcred(req->rq_xprt);
5825 5843 if (rv != NULL)
5826 5844 *rv = 1;
5827 5845 return;
5828 5846 }
5829 5847 resp->array_len = args->array_len;
5830 5848 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5831 5849 KM_SLEEP);
5832 5850
5833 5851 cs.basecr = cr;
5834 5852 nsrv4 = nfs4_get_srv();
5835 5853
5836 5854 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5837 5855 COMPOUND4args *, args);
5838 5856
5839 5857 /*
5840 5858 * For now, NFS4 compound processing must be protected by
5841 5859 * exported_lock because it can access more than one exportinfo
5842 5860 * per compound and share/unshare can now change multiple
5843 5861 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5844 5862 * per proc (excluding public exinfo), and exi_count design
5845 5863 * is sufficient to protect concurrent execution of NFS2/3
5846 5864 * ops along with unexport. This lock will be removed as
5847 5865 * part of the NFSv4 phase 2 namespace redesign work.
5848 5866 */
5849 5867 rw_enter(&ne->exported_lock, RW_READER);
5850 5868
5851 5869 /*
5852 5870 * If this is the first compound we've seen, we need to start all
5853 5871 * new instances' grace periods.
5854 5872 */
5855 5873 if (nsrv4->seen_first_compound == 0) {
5856 5874 rfs4_grace_start_new(nsrv4);
5857 5875 /*
5858 5876 * This must be set after rfs4_grace_start_new(), otherwise
5859 5877 * another thread could proceed past here before the former
5860 5878 * is finished.
5861 5879 */
5862 5880 nsrv4->seen_first_compound = 1;
5863 5881 }
5864 5882
5865 5883 for (i = 0; i < args->array_len && cs.cont; i++) {
5866 5884 nfs_argop4 *argop;
5867 5885 nfs_resop4 *resop;
5868 5886 uint_t op;
5869 5887
5870 5888 argop = &args->array[i];
5871 5889 resop = &resp->array[i];
5872 5890 resop->resop = argop->argop;
5873 5891 op = (uint_t)resop->resop;
5874 5892
5875 5893 if (op < rfsv4disp_cnt) {
5876 5894 /*
5877 5895 * Count the individual ops here; NULL and COMPOUND
5878 5896 * are counted in common_dispatch()
5879 5897 */
5880 5898 rfsproccnt_v4_ptr[op].value.ui64++;
5881 5899
5882 5900 NFS4_DEBUG(rfs4_debug > 1,
5883 5901 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5884 5902 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5885 5903 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5886 5904 rfs4_op_string[op], *cs.statusp));
5887 5905 if (*cs.statusp != NFS4_OK)
5888 5906 cs.cont = FALSE;
5889 5907 } else {
5890 5908 /*
5891 5909 * This is effectively dead code since XDR code
5892 5910 * will have already returned BADXDR if op doesn't
5893 5911 * decode to legal value. This only done for a
5894 5912 * day when XDR code doesn't verify v4 opcodes.
5895 5913 */
5896 5914 op = OP_ILLEGAL;
5897 5915 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5898 5916
5899 5917 rfs4_op_illegal(argop, resop, req, &cs);
5900 5918 cs.cont = FALSE;
5901 5919 }
5902 5920
5903 5921 /*
5904 5922 * If not at last op, and if we are to stop, then
5905 5923 * compact the results array.
5906 5924 */
5907 5925 if ((i + 1) < args->array_len && !cs.cont) {
5908 5926 nfs_resop4 *new_res = kmem_alloc(
5909 5927 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5910 5928 bcopy(resp->array,
5911 5929 new_res, (i+1) * sizeof (nfs_resop4));
5912 5930 kmem_free(resp->array,
5913 5931 args->array_len * sizeof (nfs_resop4));
5914 5932
5915 5933 resp->array_len = i + 1;
5916 5934 resp->array = new_res;
5917 5935 }
5918 5936 }
5919 5937
5920 5938 rw_exit(&ne->exported_lock);
5921 5939
5922 5940 /*
5923 5941 * clear exportinfo and vnode fields from compound_state before dtrace
5924 5942 * probe, to avoid tracing residual values for path and share path.
5925 5943 */
5926 5944 if (cs.vp)
5927 5945 VN_RELE(cs.vp);
5928 5946 if (cs.saved_vp)
5929 5947 VN_RELE(cs.saved_vp);
5930 5948 cs.exi = cs.saved_exi = NULL;
5931 5949 cs.vp = cs.saved_vp = NULL;
5932 5950
5933 5951 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5934 5952 COMPOUND4res *, resp);
5935 5953
5936 5954 if (cs.saved_fh.nfs_fh4_val)
5937 5955 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5938 5956
5939 5957 if (cs.basecr)
5940 5958 crfree(cs.basecr);
5941 5959 if (cs.cr)
5942 5960 crfree(cs.cr);
5943 5961 /*
5944 5962 * done with this compound request, free the label
5945 5963 */
5946 5964
5947 5965 if (req->rq_label != NULL) {
5948 5966 kmem_free(req->rq_label, sizeof (bslabel_t));
5949 5967 req->rq_label = NULL;
5950 5968 }
5951 5969 }
5952 5970
5953 5971 /*
5954 5972 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5955 5973 * XXX zero out the tag and array values. Need to investigate why the
5956 5974 * XXX calls occur, but at least prevent the panic for now.
5957 5975 */
5958 5976 void
5959 5977 rfs4_compound_free(COMPOUND4res *resp)
5960 5978 {
5961 5979 uint_t i;
5962 5980
5963 5981 if (resp->tag.utf8string_val) {
5964 5982 UTF8STRING_FREE(resp->tag)
5965 5983 }
5966 5984
5967 5985 for (i = 0; i < resp->array_len; i++) {
5968 5986 nfs_resop4 *resop;
5969 5987 uint_t op;
5970 5988
5971 5989 resop = &resp->array[i];
5972 5990 op = (uint_t)resop->resop;
5973 5991 if (op < rfsv4disp_cnt) {
5974 5992 (*rfsv4disptab[op].dis_resfree)(resop);
5975 5993 }
5976 5994 }
5977 5995 if (resp->array != NULL) {
5978 5996 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5979 5997 }
5980 5998 }
5981 5999
5982 6000 /*
5983 6001 * Process the value of the compound request rpc flags, as a bit-AND
5984 6002 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5985 6003 */
5986 6004 void
5987 6005 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5988 6006 {
5989 6007 int i;
5990 6008 int flag = RPC_ALL;
5991 6009
5992 6010 for (i = 0; flag && i < args->array_len; i++) {
5993 6011 uint_t op;
5994 6012
5995 6013 op = (uint_t)args->array[i].argop;
5996 6014
5997 6015 if (op < rfsv4disp_cnt)
5998 6016 flag &= rfsv4disptab[op].dis_flags;
5999 6017 else
6000 6018 flag = 0;
6001 6019 }
6002 6020 *flagp = flag;
6003 6021 }
6004 6022
6005 6023 nfsstat4
6006 6024 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6007 6025 {
6008 6026 nfsstat4 e;
6009 6027
6010 6028 rfs4_dbe_lock(cp->rc_dbe);
6011 6029
6012 6030 if (cp->rc_sysidt != LM_NOSYSID) {
6013 6031 *sp = cp->rc_sysidt;
6014 6032 e = NFS4_OK;
6015 6033
6016 6034 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6017 6035 *sp = cp->rc_sysidt;
6018 6036 e = NFS4_OK;
6019 6037
6020 6038 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6021 6039 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6022 6040 } else
6023 6041 e = NFS4ERR_DELAY;
6024 6042
6025 6043 rfs4_dbe_unlock(cp->rc_dbe);
6026 6044 return (e);
6027 6045 }
6028 6046
6029 6047 #if defined(DEBUG) && ! defined(lint)
6030 6048 static void lock_print(char *str, int operation, struct flock64 *flk)
6031 6049 {
6032 6050 char *op, *type;
6033 6051
6034 6052 switch (operation) {
6035 6053 case F_GETLK: op = "F_GETLK";
6036 6054 break;
6037 6055 case F_SETLK: op = "F_SETLK";
6038 6056 break;
6039 6057 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6040 6058 break;
6041 6059 default: op = "F_UNKNOWN";
6042 6060 break;
6043 6061 }
6044 6062 switch (flk->l_type) {
6045 6063 case F_UNLCK: type = "F_UNLCK";
6046 6064 break;
6047 6065 case F_RDLCK: type = "F_RDLCK";
6048 6066 break;
6049 6067 case F_WRLCK: type = "F_WRLCK";
6050 6068 break;
6051 6069 default: type = "F_UNKNOWN";
6052 6070 break;
6053 6071 }
6054 6072
6055 6073 ASSERT(flk->l_whence == 0);
6056 6074 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6057 6075 str, op, type, (longlong_t)flk->l_start,
6058 6076 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6059 6077 }
6060 6078
6061 6079 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6062 6080 #else
6063 6081 #define LOCK_PRINT(d, s, t, f)
6064 6082 #endif
6065 6083
6066 6084 /*ARGSUSED*/
6067 6085 static bool_t
6068 6086 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6069 6087 {
6070 6088 return (TRUE);
6071 6089 }
6072 6090
6073 6091 /*
6074 6092 * Look up the pathname using the vp in cs as the directory vnode.
6075 6093 * cs->vp will be the vnode for the file on success
6076 6094 */
6077 6095
6078 6096 static nfsstat4
6079 6097 rfs4_lookup(component4 *component, struct svc_req *req,
6080 6098 struct compound_state *cs)
6081 6099 {
6082 6100 char *nm;
6083 6101 uint32_t len;
6084 6102 nfsstat4 status;
6085 6103 struct sockaddr *ca;
6086 6104 char *name;
6087 6105
6088 6106 if (cs->vp == NULL) {
6089 6107 return (NFS4ERR_NOFILEHANDLE);
6090 6108 }
6091 6109 if (cs->vp->v_type != VDIR) {
6092 6110 return (NFS4ERR_NOTDIR);
6093 6111 }
6094 6112
6095 6113 status = utf8_dir_verify(component);
6096 6114 if (status != NFS4_OK)
6097 6115 return (status);
6098 6116
6099 6117 nm = utf8_to_fn(component, &len, NULL);
6100 6118 if (nm == NULL) {
6101 6119 return (NFS4ERR_INVAL);
6102 6120 }
6103 6121
6104 6122 if (len > MAXNAMELEN) {
6105 6123 kmem_free(nm, len);
6106 6124 return (NFS4ERR_NAMETOOLONG);
6107 6125 }
6108 6126
6109 6127 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6110 6128 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6111 6129 MAXPATHLEN + 1);
6112 6130
6113 6131 if (name == NULL) {
6114 6132 kmem_free(nm, len);
6115 6133 return (NFS4ERR_INVAL);
6116 6134 }
6117 6135
6118 6136 status = do_rfs4_op_lookup(name, req, cs);
6119 6137
6120 6138 if (name != nm)
6121 6139 kmem_free(name, MAXPATHLEN + 1);
6122 6140
6123 6141 kmem_free(nm, len);
6124 6142
6125 6143 return (status);
6126 6144 }
6127 6145
6128 6146 static nfsstat4
6129 6147 rfs4_lookupfile(component4 *component, struct svc_req *req,
6130 6148 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6131 6149 {
6132 6150 nfsstat4 status;
6133 6151 vnode_t *dvp = cs->vp;
6134 6152 vattr_t bva, ava, fva;
6135 6153 int error;
6136 6154
6137 6155 /* Get "before" change value */
6138 6156 bva.va_mask = AT_CTIME|AT_SEQ;
6139 6157 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6140 6158 if (error)
6141 6159 return (puterrno4(error));
6142 6160
6143 6161 /* rfs4_lookup may VN_RELE directory */
6144 6162 VN_HOLD(dvp);
6145 6163
6146 6164 status = rfs4_lookup(component, req, cs);
6147 6165 if (status != NFS4_OK) {
6148 6166 VN_RELE(dvp);
6149 6167 return (status);
6150 6168 }
6151 6169
6152 6170 /*
6153 6171 * Get "after" change value, if it fails, simply return the
6154 6172 * before value.
6155 6173 */
6156 6174 ava.va_mask = AT_CTIME|AT_SEQ;
6157 6175 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6158 6176 ava.va_ctime = bva.va_ctime;
6159 6177 ava.va_seq = 0;
6160 6178 }
6161 6179 VN_RELE(dvp);
6162 6180
6163 6181 /*
6164 6182 * Validate the file is a file
6165 6183 */
6166 6184 fva.va_mask = AT_TYPE|AT_MODE;
6167 6185 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6168 6186 if (error)
6169 6187 return (puterrno4(error));
6170 6188
6171 6189 if (fva.va_type != VREG) {
6172 6190 if (fva.va_type == VDIR)
6173 6191 return (NFS4ERR_ISDIR);
6174 6192 if (fva.va_type == VLNK)
6175 6193 return (NFS4ERR_SYMLINK);
6176 6194 return (NFS4ERR_INVAL);
6177 6195 }
6178 6196
6179 6197 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6180 6198 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6181 6199
6182 6200 /*
6183 6201 * It is undefined if VOP_LOOKUP will change va_seq, so
6184 6202 * cinfo.atomic = TRUE only if we have
6185 6203 * non-zero va_seq's, and they have not changed.
6186 6204 */
6187 6205 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6188 6206 cinfo->atomic = TRUE;
6189 6207 else
6190 6208 cinfo->atomic = FALSE;
6191 6209
6192 6210 /* Check for mandatory locking */
6193 6211 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6194 6212 return (check_open_access(access, cs, req));
6195 6213 }
6196 6214
6197 6215 static nfsstat4
6198 6216 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6199 6217 cred_t *cr, vnode_t **vpp, bool_t *created)
6200 6218 {
6201 6219 int error;
6202 6220 nfsstat4 status = NFS4_OK;
6203 6221 vattr_t va;
6204 6222
6205 6223 tryagain:
6206 6224
6207 6225 /*
6208 6226 * The file open mode used is VWRITE. If the client needs
6209 6227 * some other semantic, then it should do the access checking
6210 6228 * itself. It would have been nice to have the file open mode
6211 6229 * passed as part of the arguments.
6212 6230 */
6213 6231
6214 6232 *created = TRUE;
6215 6233 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6216 6234
6217 6235 if (error) {
6218 6236 *created = FALSE;
6219 6237
6220 6238 /*
6221 6239 * If we got something other than file already exists
6222 6240 * then just return this error. Otherwise, we got
6223 6241 * EEXIST. If we were doing a GUARDED create, then
6224 6242 * just return this error. Otherwise, we need to
6225 6243 * make sure that this wasn't a duplicate of an
6226 6244 * exclusive create request.
6227 6245 *
6228 6246 * The assumption is made that a non-exclusive create
6229 6247 * request will never return EEXIST.
6230 6248 */
6231 6249
6232 6250 if (error != EEXIST || mode == GUARDED4) {
6233 6251 status = puterrno4(error);
6234 6252 return (status);
6235 6253 }
6236 6254 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6237 6255 NULL, NULL, NULL);
6238 6256
6239 6257 if (error) {
6240 6258 /*
6241 6259 * We couldn't find the file that we thought that
6242 6260 * we just created. So, we'll just try creating
6243 6261 * it again.
6244 6262 */
6245 6263 if (error == ENOENT)
6246 6264 goto tryagain;
6247 6265
6248 6266 status = puterrno4(error);
6249 6267 return (status);
6250 6268 }
6251 6269
6252 6270 if (mode == UNCHECKED4) {
6253 6271 /* existing object must be regular file */
6254 6272 if ((*vpp)->v_type != VREG) {
6255 6273 if ((*vpp)->v_type == VDIR)
6256 6274 status = NFS4ERR_ISDIR;
6257 6275 else if ((*vpp)->v_type == VLNK)
6258 6276 status = NFS4ERR_SYMLINK;
6259 6277 else
6260 6278 status = NFS4ERR_INVAL;
6261 6279 VN_RELE(*vpp);
6262 6280 return (status);
6263 6281 }
6264 6282
6265 6283 return (NFS4_OK);
6266 6284 }
6267 6285
6268 6286 /* Check for duplicate request */
6269 6287 va.va_mask = AT_MTIME;
6270 6288 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6271 6289 if (!error) {
6272 6290 /* We found the file */
6273 6291 const timestruc_t *mtime = &vap->va_mtime;
6274 6292
6275 6293 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6276 6294 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6277 6295 /* but its not our creation */
6278 6296 VN_RELE(*vpp);
6279 6297 return (NFS4ERR_EXIST);
6280 6298 }
6281 6299 *created = TRUE; /* retrans of create == created */
6282 6300 return (NFS4_OK);
6283 6301 }
6284 6302 VN_RELE(*vpp);
6285 6303 return (NFS4ERR_EXIST);
6286 6304 }
6287 6305
6288 6306 return (NFS4_OK);
6289 6307 }
6290 6308
6291 6309 static nfsstat4
6292 6310 check_open_access(uint32_t access, struct compound_state *cs,
6293 6311 struct svc_req *req)
6294 6312 {
6295 6313 int error;
6296 6314 vnode_t *vp;
6297 6315 bool_t readonly;
6298 6316 cred_t *cr = cs->cr;
6299 6317
6300 6318 /* For now we don't allow mandatory locking as per V2/V3 */
6301 6319 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6302 6320 return (NFS4ERR_ACCESS);
6303 6321 }
6304 6322
6305 6323 vp = cs->vp;
6306 6324 ASSERT(cr != NULL && vp->v_type == VREG);
6307 6325
6308 6326 /*
6309 6327 * If the file system is exported read only and we are trying
6310 6328 * to open for write, then return NFS4ERR_ROFS
6311 6329 */
6312 6330
6313 6331 readonly = rdonly4(req, cs);
6314 6332
6315 6333 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6316 6334 return (NFS4ERR_ROFS);
6317 6335
6318 6336 if (access & OPEN4_SHARE_ACCESS_READ) {
6319 6337 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6320 6338 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6321 6339 return (NFS4ERR_ACCESS);
6322 6340 }
6323 6341 }
6324 6342
6325 6343 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6326 6344 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6327 6345 if (error)
6328 6346 return (NFS4ERR_ACCESS);
6329 6347 }
6330 6348
6331 6349 return (NFS4_OK);
6332 6350 }
6333 6351
6334 6352 static nfsstat4
6335 6353 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6336 6354 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6337 6355 {
6338 6356 struct nfs4_svgetit_arg sarg;
6339 6357 struct nfs4_ntov_table ntov;
6340 6358
6341 6359 bool_t ntov_table_init = FALSE;
6342 6360 struct statvfs64 sb;
6343 6361 nfsstat4 status;
6344 6362 vnode_t *vp;
6345 6363 vattr_t bva, ava, iva, cva, *vap;
6346 6364 vnode_t *dvp;
6347 6365 timespec32_t *mtime;
6348 6366 char *nm = NULL;
6349 6367 uint_t buflen;
6350 6368 bool_t created;
6351 6369 bool_t setsize = FALSE;
6352 6370 len_t reqsize;
6353 6371 int error;
6354 6372 bool_t trunc;
6355 6373 caller_context_t ct;
6356 6374 component4 *component;
6357 6375 bslabel_t *clabel;
6358 6376 struct sockaddr *ca;
6359 6377 char *name = NULL;
6360 6378
6361 6379 sarg.sbp = &sb;
6362 6380 sarg.is_referral = B_FALSE;
6363 6381
6364 6382 dvp = cs->vp;
6365 6383
6366 6384 /* Check if the file system is read only */
6367 6385 if (rdonly4(req, cs))
6368 6386 return (NFS4ERR_ROFS);
6369 6387
6370 6388 /* check the label of including directory */
6371 6389 if (is_system_labeled()) {
6372 6390 ASSERT(req->rq_label != NULL);
6373 6391 clabel = req->rq_label;
6374 6392 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6375 6393 "got client label from request(1)",
6376 6394 struct svc_req *, req);
6377 6395 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6378 6396 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6379 6397 cs->exi)) {
6380 6398 return (NFS4ERR_ACCESS);
6381 6399 }
6382 6400 }
6383 6401 }
6384 6402
6385 6403 /*
6386 6404 * Get the last component of path name in nm. cs will reference
6387 6405 * the including directory on success.
6388 6406 */
6389 6407 component = &args->open_claim4_u.file;
6390 6408 status = utf8_dir_verify(component);
6391 6409 if (status != NFS4_OK)
6392 6410 return (status);
6393 6411
6394 6412 nm = utf8_to_fn(component, &buflen, NULL);
6395 6413
6396 6414 if (nm == NULL)
6397 6415 return (NFS4ERR_RESOURCE);
6398 6416
6399 6417 if (buflen > MAXNAMELEN) {
6400 6418 kmem_free(nm, buflen);
6401 6419 return (NFS4ERR_NAMETOOLONG);
6402 6420 }
6403 6421
6404 6422 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6405 6423 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6406 6424 if (error) {
6407 6425 kmem_free(nm, buflen);
6408 6426 return (puterrno4(error));
6409 6427 }
6410 6428
6411 6429 if (bva.va_type != VDIR) {
6412 6430 kmem_free(nm, buflen);
6413 6431 return (NFS4ERR_NOTDIR);
6414 6432 }
6415 6433
6416 6434 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6417 6435
6418 6436 switch (args->mode) {
6419 6437 case GUARDED4:
6420 6438 /*FALLTHROUGH*/
6421 6439 case UNCHECKED4:
6422 6440 nfs4_ntov_table_init(&ntov);
6423 6441 ntov_table_init = TRUE;
6424 6442
6425 6443 *attrset = 0;
6426 6444 status = do_rfs4_set_attrs(attrset,
6427 6445 &args->createhow4_u.createattrs,
6428 6446 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6429 6447
6430 6448 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6431 6449 sarg.vap->va_type != VREG) {
6432 6450 if (sarg.vap->va_type == VDIR)
6433 6451 status = NFS4ERR_ISDIR;
6434 6452 else if (sarg.vap->va_type == VLNK)
6435 6453 status = NFS4ERR_SYMLINK;
6436 6454 else
6437 6455 status = NFS4ERR_INVAL;
6438 6456 }
6439 6457
6440 6458 if (status != NFS4_OK) {
6441 6459 kmem_free(nm, buflen);
6442 6460 nfs4_ntov_table_free(&ntov, &sarg);
6443 6461 *attrset = 0;
6444 6462 return (status);
6445 6463 }
6446 6464
6447 6465 vap = sarg.vap;
6448 6466 vap->va_type = VREG;
6449 6467 vap->va_mask |= AT_TYPE;
6450 6468
6451 6469 if ((vap->va_mask & AT_MODE) == 0) {
6452 6470 vap->va_mask |= AT_MODE;
6453 6471 vap->va_mode = (mode_t)0600;
6454 6472 }
6455 6473
6456 6474 if (vap->va_mask & AT_SIZE) {
6457 6475
6458 6476 /* Disallow create with a non-zero size */
6459 6477
6460 6478 if ((reqsize = sarg.vap->va_size) != 0) {
6461 6479 kmem_free(nm, buflen);
6462 6480 nfs4_ntov_table_free(&ntov, &sarg);
6463 6481 *attrset = 0;
6464 6482 return (NFS4ERR_INVAL);
6465 6483 }
6466 6484 setsize = TRUE;
6467 6485 }
6468 6486 break;
6469 6487
6470 6488 case EXCLUSIVE4:
6471 6489 /* prohibit EXCL create of named attributes */
6472 6490 if (dvp->v_flag & V_XATTRDIR) {
6473 6491 kmem_free(nm, buflen);
6474 6492 *attrset = 0;
6475 6493 return (NFS4ERR_INVAL);
6476 6494 }
6477 6495
6478 6496 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6479 6497 cva.va_type = VREG;
6480 6498 /*
6481 6499 * Ensure no time overflows. Assumes underlying
6482 6500 * filesystem supports at least 32 bits.
6483 6501 * Truncate nsec to usec resolution to allow valid
6484 6502 * compares even if the underlying filesystem truncates.
6485 6503 */
6486 6504 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6487 6505 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6488 6506 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6489 6507 cva.va_mode = (mode_t)0;
6490 6508 vap = &cva;
6491 6509
6492 6510 /*
6493 6511 * For EXCL create, attrset is set to the server attr
6494 6512 * used to cache the client's verifier.
6495 6513 */
6496 6514 *attrset = FATTR4_TIME_MODIFY_MASK;
6497 6515 break;
6498 6516 }
6499 6517
6500 6518 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6501 6519 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6502 6520 MAXPATHLEN + 1);
6503 6521
6504 6522 if (name == NULL) {
6505 6523 kmem_free(nm, buflen);
6506 6524 return (NFS4ERR_SERVERFAULT);
6507 6525 }
6508 6526
6509 6527 status = create_vnode(dvp, name, vap, args->mode,
6510 6528 cs->cr, &vp, &created);
6511 6529 if (nm != name)
6512 6530 kmem_free(name, MAXPATHLEN + 1);
6513 6531 kmem_free(nm, buflen);
6514 6532
6515 6533 if (status != NFS4_OK) {
6516 6534 if (ntov_table_init)
6517 6535 nfs4_ntov_table_free(&ntov, &sarg);
6518 6536 *attrset = 0;
6519 6537 return (status);
6520 6538 }
6521 6539
6522 6540 trunc = (setsize && !created);
6523 6541
6524 6542 if (args->mode != EXCLUSIVE4) {
6525 6543 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6526 6544
6527 6545 /*
6528 6546 * True verification that object was created with correct
6529 6547 * attrs is impossible. The attrs could have been changed
6530 6548 * immediately after object creation. If attributes did
6531 6549 * not verify, the only recourse for the server is to
6532 6550 * destroy the object. Maybe if some attrs (like gid)
6533 6551 * are set incorrectly, the object should be destroyed;
6534 6552 * however, seems bad as a default policy. Do we really
6535 6553 * want to destroy an object over one of the times not
6536 6554 * verifying correctly? For these reasons, the server
6537 6555 * currently sets bits in attrset for createattrs
6538 6556 * that were set; however, no verification is done.
6539 6557 *
6540 6558 * vmask_to_nmask accounts for vattr bits set on create
6541 6559 * [do_rfs4_set_attrs() only sets resp bits for
6542 6560 * non-vattr/vfs bits.]
6543 6561 * Mask off any bits we set by default so as not to return
6544 6562 * more attrset bits than were requested in createattrs
6545 6563 */
6546 6564 if (created) {
6547 6565 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6548 6566 *attrset &= createmask;
6549 6567 } else {
6550 6568 /*
6551 6569 * We did not create the vnode (we tried but it
6552 6570 * already existed). In this case, the only createattr
6553 6571 * that the spec allows the server to set is size,
6554 6572 * and even then, it can only be set if it is 0.
6555 6573 */
6556 6574 *attrset = 0;
6557 6575 if (trunc)
6558 6576 *attrset = FATTR4_SIZE_MASK;
6559 6577 }
6560 6578 }
6561 6579 if (ntov_table_init)
6562 6580 nfs4_ntov_table_free(&ntov, &sarg);
6563 6581
6564 6582 /*
6565 6583 * Get the initial "after" sequence number, if it fails,
6566 6584 * set to zero, time to before.
6567 6585 */
6568 6586 iva.va_mask = AT_CTIME|AT_SEQ;
6569 6587 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6570 6588 iva.va_seq = 0;
6571 6589 iva.va_ctime = bva.va_ctime;
6572 6590 }
6573 6591
6574 6592 /*
6575 6593 * create_vnode attempts to create the file exclusive,
6576 6594 * if it already exists the VOP_CREATE will fail and
6577 6595 * may not increase va_seq. It is atomic if
6578 6596 * we haven't changed the directory, but if it has changed
6579 6597 * we don't know what changed it.
6580 6598 */
6581 6599 if (!created) {
6582 6600 if (bva.va_seq && iva.va_seq &&
6583 6601 bva.va_seq == iva.va_seq)
6584 6602 cinfo->atomic = TRUE;
6585 6603 else
6586 6604 cinfo->atomic = FALSE;
6587 6605 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6588 6606 } else {
6589 6607 /*
6590 6608 * The entry was created, we need to sync the
6591 6609 * directory metadata.
6592 6610 */
6593 6611 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6594 6612
6595 6613 /*
6596 6614 * Get "after" change value, if it fails, simply return the
6597 6615 * before value.
6598 6616 */
6599 6617 ava.va_mask = AT_CTIME|AT_SEQ;
6600 6618 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6601 6619 ava.va_ctime = bva.va_ctime;
6602 6620 ava.va_seq = 0;
6603 6621 }
6604 6622
6605 6623 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6606 6624
6607 6625 /*
6608 6626 * The cinfo->atomic = TRUE only if we have
6609 6627 * non-zero va_seq's, and it has incremented by exactly one
6610 6628 * during the create_vnode and it didn't
6611 6629 * change during the VOP_FSYNC.
6612 6630 */
6613 6631 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6614 6632 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6615 6633 cinfo->atomic = TRUE;
6616 6634 else
6617 6635 cinfo->atomic = FALSE;
6618 6636 }
6619 6637
6620 6638 /* Check for mandatory locking and that the size gets set. */
6621 6639 cva.va_mask = AT_MODE;
6622 6640 if (setsize)
6623 6641 cva.va_mask |= AT_SIZE;
6624 6642
6625 6643 /* Assume the worst */
6626 6644 cs->mandlock = TRUE;
6627 6645
6628 6646 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6629 6647 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6630 6648
6631 6649 /*
6632 6650 * Truncate the file if necessary; this would be
6633 6651 * the case for create over an existing file.
6634 6652 */
6635 6653
6636 6654 if (trunc) {
6637 6655 int in_crit = 0;
6638 6656 rfs4_file_t *fp;
6639 6657 nfs4_srv_t *nsrv4;
6640 6658 bool_t create = FALSE;
6641 6659
6642 6660 /*
6643 6661 * We are writing over an existing file.
6644 6662 * Check to see if we need to recall a delegation.
6645 6663 */
6646 6664 nsrv4 = nfs4_get_srv();
6647 6665 rfs4_hold_deleg_policy(nsrv4);
6648 6666 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6649 6667 if (rfs4_check_delegated_byfp(FWRITE, fp,
6650 6668 (reqsize == 0), FALSE, FALSE, &clientid)) {
6651 6669 rfs4_file_rele(fp);
6652 6670 rfs4_rele_deleg_policy(nsrv4);
6653 6671 VN_RELE(vp);
6654 6672 *attrset = 0;
6655 6673 return (NFS4ERR_DELAY);
6656 6674 }
6657 6675 rfs4_file_rele(fp);
6658 6676 }
6659 6677 rfs4_rele_deleg_policy(nsrv4);
6660 6678
6661 6679 if (nbl_need_check(vp)) {
6662 6680 in_crit = 1;
6663 6681
6664 6682 ASSERT(reqsize == 0);
6665 6683
6666 6684 nbl_start_crit(vp, RW_READER);
6667 6685 if (nbl_conflict(vp, NBL_WRITE, 0,
6668 6686 cva.va_size, 0, NULL)) {
6669 6687 in_crit = 0;
6670 6688 nbl_end_crit(vp);
6671 6689 VN_RELE(vp);
6672 6690 *attrset = 0;
6673 6691 return (NFS4ERR_ACCESS);
6674 6692 }
6675 6693 }
6676 6694 ct.cc_sysid = 0;
6677 6695 ct.cc_pid = 0;
6678 6696 ct.cc_caller_id = nfs4_srv_caller_id;
6679 6697 ct.cc_flags = CC_DONTBLOCK;
6680 6698
6681 6699 cva.va_mask = AT_SIZE;
6682 6700 cva.va_size = reqsize;
6683 6701 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6684 6702 if (in_crit)
6685 6703 nbl_end_crit(vp);
6686 6704 }
6687 6705 }
6688 6706
6689 6707 error = makefh4(&cs->fh, vp, cs->exi);
6690 6708
6691 6709 /*
6692 6710 * Force modified data and metadata out to stable storage.
6693 6711 */
6694 6712 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6695 6713
6696 6714 if (error) {
6697 6715 VN_RELE(vp);
6698 6716 *attrset = 0;
6699 6717 return (puterrno4(error));
6700 6718 }
6701 6719
6702 6720 /* if parent dir is attrdir, set namedattr fh flag */
6703 6721 if (dvp->v_flag & V_XATTRDIR)
6704 6722 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6705 6723
6706 6724 if (cs->vp)
6707 6725 VN_RELE(cs->vp);
6708 6726
6709 6727 cs->vp = vp;
6710 6728
6711 6729 /*
6712 6730 * if we did not create the file, we will need to check
6713 6731 * the access bits on the file
6714 6732 */
6715 6733
6716 6734 if (!created) {
6717 6735 if (setsize)
6718 6736 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6719 6737 status = check_open_access(args->share_access, cs, req);
6720 6738 if (status != NFS4_OK)
6721 6739 *attrset = 0;
6722 6740 }
6723 6741 return (status);
6724 6742 }
6725 6743
6726 6744 /*ARGSUSED*/
6727 6745 static void
6728 6746 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6729 6747 rfs4_openowner_t *oo, delegreq_t deleg,
6730 6748 uint32_t access, uint32_t deny,
6731 6749 OPEN4res *resp, int deleg_cur)
6732 6750 {
6733 6751 /* XXX Currently not using req */
6734 6752 rfs4_state_t *sp;
6735 6753 rfs4_file_t *fp;
6736 6754 bool_t screate = TRUE;
6737 6755 bool_t fcreate = TRUE;
6738 6756 uint32_t open_a, share_a;
6739 6757 uint32_t open_d, share_d;
6740 6758 rfs4_deleg_state_t *dsp;
6741 6759 sysid_t sysid;
6742 6760 nfsstat4 status;
6743 6761 caller_context_t ct;
6744 6762 int fflags = 0;
6745 6763 int recall = 0;
6746 6764 int err;
6747 6765 int first_open;
6748 6766
6749 6767 /* get the file struct and hold a lock on it during initial open */
6750 6768 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6751 6769 if (fp == NULL) {
6752 6770 resp->status = NFS4ERR_RESOURCE;
6753 6771 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6754 6772 return;
6755 6773 }
6756 6774
6757 6775 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6758 6776 if (sp == NULL) {
6759 6777 resp->status = NFS4ERR_RESOURCE;
6760 6778 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6761 6779 /* No need to keep any reference */
6762 6780 rw_exit(&fp->rf_file_rwlock);
6763 6781 rfs4_file_rele(fp);
6764 6782 return;
6765 6783 }
6766 6784
6767 6785 /* try to get the sysid before continuing */
6768 6786 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6769 6787 resp->status = status;
6770 6788 rfs4_file_rele(fp);
6771 6789 /* Not a fully formed open; "close" it */
6772 6790 if (screate == TRUE)
6773 6791 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6774 6792 rfs4_state_rele(sp);
6775 6793 return;
6776 6794 }
6777 6795
6778 6796 /* Calculate the fflags for this OPEN. */
6779 6797 if (access & OPEN4_SHARE_ACCESS_READ)
6780 6798 fflags |= FREAD;
6781 6799 if (access & OPEN4_SHARE_ACCESS_WRITE)
6782 6800 fflags |= FWRITE;
6783 6801
6784 6802 rfs4_dbe_lock(sp->rs_dbe);
6785 6803
6786 6804 /*
6787 6805 * Calculate the new deny and access mode that this open is adding to
6788 6806 * the file for this open owner;
6789 6807 */
6790 6808 open_d = (deny & ~sp->rs_open_deny);
6791 6809 open_a = (access & ~sp->rs_open_access);
6792 6810
6793 6811 /*
6794 6812 * Calculate the new share access and share deny modes that this open
6795 6813 * is adding to the file for this open owner;
6796 6814 */
6797 6815 share_a = (access & ~sp->rs_share_access);
6798 6816 share_d = (deny & ~sp->rs_share_deny);
6799 6817
6800 6818 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6801 6819
6802 6820 /*
6803 6821 * Check to see the client has already sent an open for this
6804 6822 * open owner on this file with the same share/deny modes.
6805 6823 * If so, we don't need to check for a conflict and we don't
6806 6824 * need to add another shrlock. If not, then we need to
6807 6825 * check for conflicts in deny and access before checking for
6808 6826 * conflicts in delegation. We don't want to recall a
6809 6827 * delegation based on an open that will eventually fail based
6810 6828 * on shares modes.
6811 6829 */
6812 6830
6813 6831 if (share_a || share_d) {
6814 6832 if ((err = rfs4_share(sp, access, deny)) != 0) {
6815 6833 rfs4_dbe_unlock(sp->rs_dbe);
6816 6834 resp->status = err;
6817 6835
6818 6836 rfs4_file_rele(fp);
6819 6837 /* Not a fully formed open; "close" it */
6820 6838 if (screate == TRUE)
6821 6839 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6822 6840 rfs4_state_rele(sp);
6823 6841 return;
6824 6842 }
6825 6843 }
6826 6844
6827 6845 rfs4_dbe_lock(fp->rf_dbe);
6828 6846
6829 6847 /*
6830 6848 * Check to see if this file is delegated and if so, if a
6831 6849 * recall needs to be done.
6832 6850 */
6833 6851 if (rfs4_check_recall(sp, access)) {
6834 6852 rfs4_dbe_unlock(fp->rf_dbe);
6835 6853 rfs4_dbe_unlock(sp->rs_dbe);
6836 6854 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6837 6855 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6838 6856 rfs4_dbe_lock(sp->rs_dbe);
6839 6857
6840 6858 /* if state closed while lock was dropped */
6841 6859 if (sp->rs_closed) {
6842 6860 if (share_a || share_d)
6843 6861 (void) rfs4_unshare(sp);
6844 6862 rfs4_dbe_unlock(sp->rs_dbe);
6845 6863 rfs4_file_rele(fp);
6846 6864 /* Not a fully formed open; "close" it */
6847 6865 if (screate == TRUE)
6848 6866 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6849 6867 rfs4_state_rele(sp);
6850 6868 resp->status = NFS4ERR_OLD_STATEID;
6851 6869 return;
6852 6870 }
6853 6871
6854 6872 rfs4_dbe_lock(fp->rf_dbe);
6855 6873 /* Let's see if the delegation was returned */
6856 6874 if (rfs4_check_recall(sp, access)) {
6857 6875 rfs4_dbe_unlock(fp->rf_dbe);
6858 6876 if (share_a || share_d)
6859 6877 (void) rfs4_unshare(sp);
6860 6878 rfs4_dbe_unlock(sp->rs_dbe);
6861 6879 rfs4_file_rele(fp);
6862 6880 rfs4_update_lease(sp->rs_owner->ro_client);
6863 6881
6864 6882 /* Not a fully formed open; "close" it */
6865 6883 if (screate == TRUE)
6866 6884 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6867 6885 rfs4_state_rele(sp);
6868 6886 resp->status = NFS4ERR_DELAY;
6869 6887 return;
6870 6888 }
6871 6889 }
6872 6890 /*
6873 6891 * the share check passed and any delegation conflict has been
6874 6892 * taken care of, now call vop_open.
6875 6893 * if this is the first open then call vop_open with fflags.
6876 6894 * if not, call vn_open_upgrade with just the upgrade flags.
6877 6895 *
6878 6896 * if the file has been opened already, it will have the current
6879 6897 * access mode in the state struct. if it has no share access, then
6880 6898 * this is a new open.
6881 6899 *
6882 6900 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6883 6901 * call VOP_OPEN(), just do the open upgrade.
6884 6902 */
6885 6903 if (first_open && !deleg_cur) {
6886 6904 ct.cc_sysid = sysid;
6887 6905 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6888 6906 ct.cc_caller_id = nfs4_srv_caller_id;
6889 6907 ct.cc_flags = CC_DONTBLOCK;
6890 6908 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6891 6909 if (err) {
6892 6910 rfs4_dbe_unlock(fp->rf_dbe);
6893 6911 if (share_a || share_d)
6894 6912 (void) rfs4_unshare(sp);
6895 6913 rfs4_dbe_unlock(sp->rs_dbe);
6896 6914 rfs4_file_rele(fp);
6897 6915
6898 6916 /* Not a fully formed open; "close" it */
6899 6917 if (screate == TRUE)
6900 6918 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6901 6919 rfs4_state_rele(sp);
6902 6920 /* check if a monitor detected a delegation conflict */
6903 6921 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6904 6922 resp->status = NFS4ERR_DELAY;
6905 6923 else
6906 6924 resp->status = NFS4ERR_SERVERFAULT;
6907 6925 return;
6908 6926 }
6909 6927 } else { /* open upgrade */
6910 6928 /*
6911 6929 * calculate the fflags for the new mode that is being added
6912 6930 * by this upgrade.
6913 6931 */
6914 6932 fflags = 0;
6915 6933 if (open_a & OPEN4_SHARE_ACCESS_READ)
6916 6934 fflags |= FREAD;
6917 6935 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6918 6936 fflags |= FWRITE;
6919 6937 vn_open_upgrade(cs->vp, fflags);
6920 6938 }
6921 6939 sp->rs_open_access |= access;
6922 6940 sp->rs_open_deny |= deny;
6923 6941
6924 6942 if (open_d & OPEN4_SHARE_DENY_READ)
6925 6943 fp->rf_deny_read++;
6926 6944 if (open_d & OPEN4_SHARE_DENY_WRITE)
6927 6945 fp->rf_deny_write++;
6928 6946 fp->rf_share_deny |= deny;
6929 6947
6930 6948 if (open_a & OPEN4_SHARE_ACCESS_READ)
6931 6949 fp->rf_access_read++;
6932 6950 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6933 6951 fp->rf_access_write++;
6934 6952 fp->rf_share_access |= access;
6935 6953
6936 6954 /*
6937 6955 * Check for delegation here. if the deleg argument is not
6938 6956 * DELEG_ANY, then this is a reclaim from a client and
6939 6957 * we must honor the delegation requested. If necessary we can
6940 6958 * set the recall flag.
6941 6959 */
6942 6960
6943 6961 dsp = rfs4_grant_delegation(deleg, sp, &recall);
6944 6962
6945 6963 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6946 6964
6947 6965 next_stateid(&sp->rs_stateid);
6948 6966
6949 6967 resp->stateid = sp->rs_stateid.stateid;
6950 6968
6951 6969 rfs4_dbe_unlock(fp->rf_dbe);
6952 6970 rfs4_dbe_unlock(sp->rs_dbe);
6953 6971
6954 6972 if (dsp) {
6955 6973 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6956 6974 rfs4_deleg_state_rele(dsp);
6957 6975 }
6958 6976
6959 6977 rfs4_file_rele(fp);
6960 6978 rfs4_state_rele(sp);
6961 6979
6962 6980 resp->status = NFS4_OK;
6963 6981 }
6964 6982
6965 6983 /*ARGSUSED*/
6966 6984 static void
6967 6985 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6968 6986 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6969 6987 {
6970 6988 change_info4 *cinfo = &resp->cinfo;
6971 6989 bitmap4 *attrset = &resp->attrset;
6972 6990
6973 6991 if (args->opentype == OPEN4_NOCREATE)
6974 6992 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6975 6993 req, cs, args->share_access, cinfo);
6976 6994 else {
6977 6995 /* inhibit delegation grants during exclusive create */
6978 6996
6979 6997 if (args->mode == EXCLUSIVE4)
6980 6998 rfs4_disable_delegation();
6981 6999
6982 7000 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6983 7001 oo->ro_client->rc_clientid);
6984 7002 }
6985 7003
6986 7004 if (resp->status == NFS4_OK) {
6987 7005
6988 7006 /* cs->vp cs->fh now reference the desired file */
6989 7007
6990 7008 rfs4_do_open(cs, req, oo,
6991 7009 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6992 7010 args->share_access, args->share_deny, resp, 0);
6993 7011
6994 7012 /*
6995 7013 * If rfs4_createfile set attrset, we must
6996 7014 * clear this attrset before the response is copied.
6997 7015 */
6998 7016 if (resp->status != NFS4_OK && resp->attrset) {
6999 7017 resp->attrset = 0;
7000 7018 }
7001 7019 }
7002 7020 else
7003 7021 *cs->statusp = resp->status;
7004 7022
7005 7023 if (args->mode == EXCLUSIVE4)
7006 7024 rfs4_enable_delegation();
7007 7025 }
7008 7026
7009 7027 /*ARGSUSED*/
7010 7028 static void
7011 7029 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7012 7030 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7013 7031 {
7014 7032 change_info4 *cinfo = &resp->cinfo;
7015 7033 vattr_t va;
7016 7034 vtype_t v_type = cs->vp->v_type;
7017 7035 int error = 0;
7018 7036
7019 7037 /* Verify that we have a regular file */
7020 7038 if (v_type != VREG) {
7021 7039 if (v_type == VDIR)
7022 7040 resp->status = NFS4ERR_ISDIR;
7023 7041 else if (v_type == VLNK)
7024 7042 resp->status = NFS4ERR_SYMLINK;
7025 7043 else
7026 7044 resp->status = NFS4ERR_INVAL;
7027 7045 return;
7028 7046 }
7029 7047
7030 7048 va.va_mask = AT_MODE|AT_UID;
7031 7049 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7032 7050 if (error) {
7033 7051 resp->status = puterrno4(error);
7034 7052 return;
7035 7053 }
7036 7054
7037 7055 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7038 7056
7039 7057 /*
7040 7058 * Check if we have access to the file, Note the the file
7041 7059 * could have originally been open UNCHECKED or GUARDED
7042 7060 * with mode bits that will now fail, but there is nothing
7043 7061 * we can really do about that except in the case that the
7044 7062 * owner of the file is the one requesting the open.
7045 7063 */
7046 7064 if (crgetuid(cs->cr) != va.va_uid) {
7047 7065 resp->status = check_open_access(args->share_access, cs, req);
7048 7066 if (resp->status != NFS4_OK) {
7049 7067 return;
7050 7068 }
7051 7069 }
7052 7070
7053 7071 /*
7054 7072 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7055 7073 */
7056 7074 cinfo->before = 0;
7057 7075 cinfo->after = 0;
7058 7076 cinfo->atomic = FALSE;
7059 7077
7060 7078 rfs4_do_open(cs, req, oo,
7061 7079 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7062 7080 args->share_access, args->share_deny, resp, 0);
7063 7081 }
7064 7082
7065 7083 static void
7066 7084 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7067 7085 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7068 7086 {
7069 7087 int error;
7070 7088 nfsstat4 status;
7071 7089 stateid4 stateid =
7072 7090 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7073 7091 rfs4_deleg_state_t *dsp;
7074 7092
7075 7093 /*
7076 7094 * Find the state info from the stateid and confirm that the
7077 7095 * file is delegated. If the state openowner is the same as
7078 7096 * the supplied openowner we're done. If not, get the file
7079 7097 * info from the found state info. Use that file info to
7080 7098 * create the state for this lock owner. Note solaris doen't
7081 7099 * really need the pathname to find the file. We may want to
7082 7100 * lookup the pathname and make sure that the vp exist and
7083 7101 * matches the vp in the file structure. However it is
7084 7102 * possible that the pathname nolonger exists (local process
7085 7103 * unlinks the file), so this may not be that useful.
7086 7104 */
7087 7105
7088 7106 status = rfs4_get_deleg_state(&stateid, &dsp);
7089 7107 if (status != NFS4_OK) {
7090 7108 resp->status = status;
7091 7109 return;
7092 7110 }
7093 7111
7094 7112 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7095 7113
7096 7114 /*
7097 7115 * New lock owner, create state. Since this was probably called
7098 7116 * in response to a CB_RECALL we set deleg to DELEG_NONE
7099 7117 */
7100 7118
7101 7119 ASSERT(cs->vp != NULL);
7102 7120 VN_RELE(cs->vp);
7103 7121 VN_HOLD(dsp->rds_finfo->rf_vp);
7104 7122 cs->vp = dsp->rds_finfo->rf_vp;
7105 7123
7106 7124 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7107 7125 rfs4_deleg_state_rele(dsp);
7108 7126 *cs->statusp = resp->status = puterrno4(error);
7109 7127 return;
7110 7128 }
7111 7129
7112 7130 /* Mark progress for delegation returns */
7113 7131 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7114 7132 rfs4_deleg_state_rele(dsp);
7115 7133 rfs4_do_open(cs, req, oo, DELEG_NONE,
7116 7134 args->share_access, args->share_deny, resp, 1);
7117 7135 }
7118 7136
7119 7137 /*ARGSUSED*/
7120 7138 static void
7121 7139 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7122 7140 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7123 7141 {
7124 7142 /*
7125 7143 * Lookup the pathname, it must already exist since this file
7126 7144 * was delegated.
7127 7145 *
7128 7146 * Find the file and state info for this vp and open owner pair.
7129 7147 * check that they are in fact delegated.
7130 7148 * check that the state access and deny modes are the same.
7131 7149 *
7132 7150 * Return the delgation possibly seting the recall flag.
7133 7151 */
7134 7152 rfs4_file_t *fp;
7135 7153 rfs4_state_t *sp;
7136 7154 bool_t create = FALSE;
7137 7155 bool_t dcreate = FALSE;
7138 7156 rfs4_deleg_state_t *dsp;
7139 7157 nfsace4 *ace;
7140 7158
7141 7159 /* Note we ignore oflags */
7142 7160 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7143 7161 req, cs, args->share_access, &resp->cinfo);
7144 7162
7145 7163 if (resp->status != NFS4_OK) {
7146 7164 return;
7147 7165 }
7148 7166
7149 7167 /* get the file struct and hold a lock on it during initial open */
7150 7168 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7151 7169 if (fp == NULL) {
7152 7170 resp->status = NFS4ERR_RESOURCE;
7153 7171 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7154 7172 return;
7155 7173 }
7156 7174
7157 7175 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7158 7176 if (sp == NULL) {
7159 7177 resp->status = NFS4ERR_SERVERFAULT;
7160 7178 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7161 7179 rw_exit(&fp->rf_file_rwlock);
7162 7180 rfs4_file_rele(fp);
7163 7181 return;
7164 7182 }
7165 7183
7166 7184 rfs4_dbe_lock(sp->rs_dbe);
7167 7185 rfs4_dbe_lock(fp->rf_dbe);
7168 7186 if (args->share_access != sp->rs_share_access ||
7169 7187 args->share_deny != sp->rs_share_deny ||
7170 7188 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7171 7189 NFS4_DEBUG(rfs4_debug,
7172 7190 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7173 7191 rfs4_dbe_unlock(fp->rf_dbe);
7174 7192 rfs4_dbe_unlock(sp->rs_dbe);
7175 7193 rfs4_file_rele(fp);
7176 7194 rfs4_state_rele(sp);
7177 7195 resp->status = NFS4ERR_SERVERFAULT;
7178 7196 return;
7179 7197 }
7180 7198 rfs4_dbe_unlock(fp->rf_dbe);
7181 7199 rfs4_dbe_unlock(sp->rs_dbe);
7182 7200
7183 7201 dsp = rfs4_finddeleg(sp, &dcreate);
7184 7202 if (dsp == NULL) {
7185 7203 rfs4_state_rele(sp);
7186 7204 rfs4_file_rele(fp);
7187 7205 resp->status = NFS4ERR_SERVERFAULT;
7188 7206 return;
7189 7207 }
7190 7208
7191 7209 next_stateid(&sp->rs_stateid);
7192 7210
7193 7211 resp->stateid = sp->rs_stateid.stateid;
7194 7212
7195 7213 resp->delegation.delegation_type = dsp->rds_dtype;
7196 7214
7197 7215 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7198 7216 open_read_delegation4 *rv =
7199 7217 &resp->delegation.open_delegation4_u.read;
7200 7218
7201 7219 rv->stateid = dsp->rds_delegid.stateid;
7202 7220 rv->recall = FALSE; /* no policy in place to set to TRUE */
7203 7221 ace = &rv->permissions;
7204 7222 } else {
7205 7223 open_write_delegation4 *rv =
7206 7224 &resp->delegation.open_delegation4_u.write;
7207 7225
7208 7226 rv->stateid = dsp->rds_delegid.stateid;
7209 7227 rv->recall = FALSE; /* no policy in place to set to TRUE */
7210 7228 ace = &rv->permissions;
7211 7229 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7212 7230 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7213 7231 }
7214 7232
7215 7233 /* XXX For now */
7216 7234 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7217 7235 ace->flag = 0;
7218 7236 ace->access_mask = 0;
7219 7237 ace->who.utf8string_len = 0;
7220 7238 ace->who.utf8string_val = 0;
7221 7239
7222 7240 rfs4_deleg_state_rele(dsp);
7223 7241 rfs4_state_rele(sp);
7224 7242 rfs4_file_rele(fp);
7225 7243 }
7226 7244
7227 7245 typedef enum {
7228 7246 NFS4_CHKSEQ_OKAY = 0,
7229 7247 NFS4_CHKSEQ_REPLAY = 1,
7230 7248 NFS4_CHKSEQ_BAD = 2
7231 7249 } rfs4_chkseq_t;
7232 7250
7233 7251 /*
7234 7252 * Generic function for sequence number checks.
7235 7253 */
7236 7254 static rfs4_chkseq_t
7237 7255 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7238 7256 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7239 7257 {
7240 7258 /* Same sequence ids and matching operations? */
7241 7259 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7242 7260 if (copyres == TRUE) {
7243 7261 rfs4_free_reply(resop);
7244 7262 rfs4_copy_reply(resop, lastop);
7245 7263 }
7246 7264 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7247 7265 "Replayed SEQID %d\n", seqid));
7248 7266 return (NFS4_CHKSEQ_REPLAY);
7249 7267 }
7250 7268
7251 7269 /* If the incoming sequence is not the next expected then it is bad */
7252 7270 if (rqst_seq != seqid + 1) {
7253 7271 if (rqst_seq == seqid) {
7254 7272 NFS4_DEBUG(rfs4_debug,
7255 7273 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7256 7274 "but last op was %d current op is %d\n",
7257 7275 lastop->resop, resop->resop));
7258 7276 return (NFS4_CHKSEQ_BAD);
7259 7277 }
7260 7278 NFS4_DEBUG(rfs4_debug,
7261 7279 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7262 7280 rqst_seq, seqid));
7263 7281 return (NFS4_CHKSEQ_BAD);
7264 7282 }
7265 7283
7266 7284 /* Everything okay -- next expected */
7267 7285 return (NFS4_CHKSEQ_OKAY);
7268 7286 }
7269 7287
7270 7288
7271 7289 static rfs4_chkseq_t
7272 7290 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7273 7291 {
7274 7292 rfs4_chkseq_t rc;
7275 7293
7276 7294 rfs4_dbe_lock(op->ro_dbe);
7277 7295 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7278 7296 TRUE);
7279 7297 rfs4_dbe_unlock(op->ro_dbe);
7280 7298
7281 7299 if (rc == NFS4_CHKSEQ_OKAY)
7282 7300 rfs4_update_lease(op->ro_client);
7283 7301
7284 7302 return (rc);
7285 7303 }
7286 7304
7287 7305 static rfs4_chkseq_t
7288 7306 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7289 7307 {
7290 7308 rfs4_chkseq_t rc;
7291 7309
7292 7310 rfs4_dbe_lock(op->ro_dbe);
7293 7311 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7294 7312 olo_seqid, resop, FALSE);
7295 7313 rfs4_dbe_unlock(op->ro_dbe);
7296 7314
7297 7315 return (rc);
7298 7316 }
7299 7317
7300 7318 static rfs4_chkseq_t
7301 7319 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7302 7320 {
7303 7321 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7304 7322
7305 7323 rfs4_dbe_lock(lsp->rls_dbe);
7306 7324 if (!lsp->rls_skip_seqid_check)
7307 7325 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7308 7326 resop, TRUE);
7309 7327 rfs4_dbe_unlock(lsp->rls_dbe);
7310 7328
7311 7329 return (rc);
7312 7330 }
7313 7331
7314 7332 static void
7315 7333 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7316 7334 struct svc_req *req, struct compound_state *cs)
7317 7335 {
7318 7336 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7319 7337 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7320 7338 open_owner4 *owner = &args->owner;
7321 7339 open_claim_type4 claim = args->claim;
7322 7340 rfs4_client_t *cp;
7323 7341 rfs4_openowner_t *oo;
7324 7342 bool_t create;
7325 7343 bool_t replay = FALSE;
7326 7344 int can_reclaim;
7327 7345
7328 7346 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7329 7347 OPEN4args *, args);
7330 7348
7331 7349 if (cs->vp == NULL) {
7332 7350 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7333 7351 goto end;
7334 7352 }
7335 7353
7336 7354 /*
7337 7355 * Need to check clientid and lease expiration first based on
7338 7356 * error ordering and incrementing sequence id.
7339 7357 */
7340 7358 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7341 7359 if (cp == NULL) {
7342 7360 *cs->statusp = resp->status =
7343 7361 rfs4_check_clientid(&owner->clientid, 0);
7344 7362 goto end;
7345 7363 }
7346 7364
7347 7365 if (rfs4_lease_expired(cp)) {
7348 7366 rfs4_client_close(cp);
7349 7367 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7350 7368 goto end;
7351 7369 }
7352 7370 can_reclaim = cp->rc_can_reclaim;
7353 7371
7354 7372 /*
7355 7373 * Find the open_owner for use from this point forward. Take
7356 7374 * care in updating the sequence id based on the type of error
7357 7375 * being returned.
7358 7376 */
7359 7377 retry:
7360 7378 create = TRUE;
7361 7379 oo = rfs4_findopenowner(owner, &create, args->seqid);
7362 7380 if (oo == NULL) {
7363 7381 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7364 7382 rfs4_client_rele(cp);
7365 7383 goto end;
7366 7384 }
7367 7385
7368 7386 /* Hold off access to the sequence space while the open is done */
7369 7387 rfs4_sw_enter(&oo->ro_sw);
7370 7388
7371 7389 /*
7372 7390 * If the open_owner existed before at the server, then check
7373 7391 * the sequence id.
7374 7392 */
7375 7393 if (!create && !oo->ro_postpone_confirm) {
7376 7394 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7377 7395 case NFS4_CHKSEQ_BAD:
7378 7396 if ((args->seqid > oo->ro_open_seqid) &&
7379 7397 oo->ro_need_confirm) {
7380 7398 rfs4_free_opens(oo, TRUE, FALSE);
7381 7399 rfs4_sw_exit(&oo->ro_sw);
7382 7400 rfs4_openowner_rele(oo);
7383 7401 goto retry;
7384 7402 }
7385 7403 resp->status = NFS4ERR_BAD_SEQID;
7386 7404 goto out;
7387 7405 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7388 7406 replay = TRUE;
7389 7407 goto out;
7390 7408 default:
7391 7409 break;
7392 7410 }
7393 7411
7394 7412 /*
7395 7413 * Sequence was ok and open owner exists
7396 7414 * check to see if we have yet to see an
7397 7415 * open_confirm.
7398 7416 */
7399 7417 if (oo->ro_need_confirm) {
7400 7418 rfs4_free_opens(oo, TRUE, FALSE);
7401 7419 rfs4_sw_exit(&oo->ro_sw);
7402 7420 rfs4_openowner_rele(oo);
7403 7421 goto retry;
7404 7422 }
7405 7423 }
7406 7424 /* Grace only applies to regular-type OPENs */
7407 7425 if (rfs4_clnt_in_grace(cp) &&
7408 7426 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7409 7427 *cs->statusp = resp->status = NFS4ERR_GRACE;
7410 7428 goto out;
7411 7429 }
7412 7430
7413 7431 /*
7414 7432 * If previous state at the server existed then can_reclaim
7415 7433 * will be set. If not reply NFS4ERR_NO_GRACE to the
7416 7434 * client.
7417 7435 */
7418 7436 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7419 7437 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7420 7438 goto out;
7421 7439 }
7422 7440
7423 7441
7424 7442 /*
7425 7443 * Reject the open if the client has missed the grace period
7426 7444 */
7427 7445 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7428 7446 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7429 7447 goto out;
7430 7448 }
7431 7449
7432 7450 /* Couple of up-front bookkeeping items */
7433 7451 if (oo->ro_need_confirm) {
7434 7452 /*
7435 7453 * If this is a reclaim OPEN then we should not ask
7436 7454 * for a confirmation of the open_owner per the
7437 7455 * protocol specification.
7438 7456 */
7439 7457 if (claim == CLAIM_PREVIOUS)
7440 7458 oo->ro_need_confirm = FALSE;
7441 7459 else
7442 7460 resp->rflags |= OPEN4_RESULT_CONFIRM;
7443 7461 }
7444 7462 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7445 7463
7446 7464 /*
7447 7465 * If there is an unshared filesystem mounted on this vnode,
7448 7466 * do not allow to open/create in this directory.
7449 7467 */
7450 7468 if (vn_ismntpt(cs->vp)) {
7451 7469 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7452 7470 goto out;
7453 7471 }
7454 7472
7455 7473 /*
7456 7474 * access must READ, WRITE, or BOTH. No access is invalid.
7457 7475 * deny can be READ, WRITE, BOTH, or NONE.
7458 7476 * bits not defined for access/deny are invalid.
7459 7477 */
7460 7478 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7461 7479 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7462 7480 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7463 7481 *cs->statusp = resp->status = NFS4ERR_INVAL;
7464 7482 goto out;
7465 7483 }
7466 7484
7467 7485
7468 7486 /*
7469 7487 * make sure attrset is zero before response is built.
7470 7488 */
7471 7489 resp->attrset = 0;
7472 7490
7473 7491 switch (claim) {
7474 7492 case CLAIM_NULL:
7475 7493 rfs4_do_opennull(cs, req, args, oo, resp);
7476 7494 break;
7477 7495 case CLAIM_PREVIOUS:
7478 7496 rfs4_do_openprev(cs, req, args, oo, resp);
7479 7497 break;
7480 7498 case CLAIM_DELEGATE_CUR:
7481 7499 rfs4_do_opendelcur(cs, req, args, oo, resp);
7482 7500 break;
7483 7501 case CLAIM_DELEGATE_PREV:
7484 7502 rfs4_do_opendelprev(cs, req, args, oo, resp);
7485 7503 break;
7486 7504 default:
7487 7505 resp->status = NFS4ERR_INVAL;
7488 7506 break;
7489 7507 }
7490 7508
7491 7509 out:
7492 7510 rfs4_client_rele(cp);
7493 7511
7494 7512 /* Catch sequence id handling here to make it a little easier */
7495 7513 switch (resp->status) {
7496 7514 case NFS4ERR_BADXDR:
7497 7515 case NFS4ERR_BAD_SEQID:
7498 7516 case NFS4ERR_BAD_STATEID:
7499 7517 case NFS4ERR_NOFILEHANDLE:
7500 7518 case NFS4ERR_RESOURCE:
7501 7519 case NFS4ERR_STALE_CLIENTID:
7502 7520 case NFS4ERR_STALE_STATEID:
7503 7521 /*
7504 7522 * The protocol states that if any of these errors are
7505 7523 * being returned, the sequence id should not be
7506 7524 * incremented. Any other return requires an
7507 7525 * increment.
7508 7526 */
7509 7527 break;
7510 7528 default:
7511 7529 /* Always update the lease in this case */
7512 7530 rfs4_update_lease(oo->ro_client);
7513 7531
7514 7532 /* Regular response - copy the result */
7515 7533 if (!replay)
7516 7534 rfs4_update_open_resp(oo, resop, &cs->fh);
7517 7535
7518 7536 /*
7519 7537 * REPLAY case: Only if the previous response was OK
7520 7538 * do we copy the filehandle. If not OK, no
7521 7539 * filehandle to copy.
7522 7540 */
7523 7541 if (replay == TRUE &&
7524 7542 resp->status == NFS4_OK &&
7525 7543 oo->ro_reply_fh.nfs_fh4_val) {
7526 7544 /*
7527 7545 * If this is a replay, we must restore the
7528 7546 * current filehandle/vp to that of what was
7529 7547 * returned originally. Try our best to do
7530 7548 * it.
7531 7549 */
7532 7550 nfs_fh4_fmt_t *fh_fmtp =
7533 7551 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7534 7552
7535 7553 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7536 7554 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7537 7555
7538 7556 if (cs->exi == NULL) {
7539 7557 resp->status = NFS4ERR_STALE;
7540 7558 goto finish;
7541 7559 }
7542 7560
7543 7561 VN_RELE(cs->vp);
7544 7562
7545 7563 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7546 7564 &resp->status);
7547 7565
7548 7566 if (cs->vp == NULL)
7549 7567 goto finish;
7550 7568
7551 7569 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7552 7570 }
7553 7571
7554 7572 /*
7555 7573 * If this was a replay, no need to update the
7556 7574 * sequence id. If the open_owner was not created on
7557 7575 * this pass, then update. The first use of an
7558 7576 * open_owner will not bump the sequence id.
7559 7577 */
7560 7578 if (replay == FALSE && !create)
7561 7579 rfs4_update_open_sequence(oo);
7562 7580 /*
7563 7581 * If the client is receiving an error and the
7564 7582 * open_owner needs to be confirmed, there is no way
7565 7583 * to notify the client of this fact ignoring the fact
7566 7584 * that the server has no method of returning a
7567 7585 * stateid to confirm. Therefore, the server needs to
7568 7586 * mark this open_owner in a way as to avoid the
7569 7587 * sequence id checking the next time the client uses
7570 7588 * this open_owner.
7571 7589 */
7572 7590 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7573 7591 oo->ro_postpone_confirm = TRUE;
7574 7592 /*
7575 7593 * If OK response then clear the postpone flag and
7576 7594 * reset the sequence id to keep in sync with the
7577 7595 * client.
7578 7596 */
7579 7597 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7580 7598 oo->ro_postpone_confirm = FALSE;
7581 7599 oo->ro_open_seqid = args->seqid;
7582 7600 }
7583 7601 break;
7584 7602 }
7585 7603
7586 7604 finish:
7587 7605 *cs->statusp = resp->status;
7588 7606
7589 7607 rfs4_sw_exit(&oo->ro_sw);
7590 7608 rfs4_openowner_rele(oo);
7591 7609
7592 7610 end:
7593 7611 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7594 7612 OPEN4res *, resp);
7595 7613 }
7596 7614
7597 7615 /*ARGSUSED*/
7598 7616 void
7599 7617 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7600 7618 struct svc_req *req, struct compound_state *cs)
7601 7619 {
7602 7620 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7603 7621 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7604 7622 rfs4_state_t *sp;
7605 7623 nfsstat4 status;
7606 7624
7607 7625 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7608 7626 OPEN_CONFIRM4args *, args);
7609 7627
7610 7628 if (cs->vp == NULL) {
7611 7629 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7612 7630 goto out;
7613 7631 }
7614 7632
7615 7633 if (cs->vp->v_type != VREG) {
7616 7634 *cs->statusp = resp->status =
7617 7635 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7618 7636 return;
7619 7637 }
7620 7638
7621 7639 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7622 7640 if (status != NFS4_OK) {
7623 7641 *cs->statusp = resp->status = status;
7624 7642 goto out;
7625 7643 }
7626 7644
7627 7645 /* Ensure specified filehandle matches */
7628 7646 if (cs->vp != sp->rs_finfo->rf_vp) {
7629 7647 rfs4_state_rele(sp);
7630 7648 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7631 7649 goto out;
7632 7650 }
7633 7651
7634 7652 /* hold off other access to open_owner while we tinker */
7635 7653 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7636 7654
7637 7655 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7638 7656 case NFS4_CHECK_STATEID_OKAY:
7639 7657 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7640 7658 resop) != 0) {
7641 7659 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7642 7660 break;
7643 7661 }
7644 7662 /*
7645 7663 * If it is the appropriate stateid and determined to
7646 7664 * be "OKAY" then this means that the stateid does not
7647 7665 * need to be confirmed and the client is in error for
7648 7666 * sending an OPEN_CONFIRM.
7649 7667 */
7650 7668 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7651 7669 break;
7652 7670 case NFS4_CHECK_STATEID_OLD:
7653 7671 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7654 7672 break;
7655 7673 case NFS4_CHECK_STATEID_BAD:
7656 7674 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7657 7675 break;
7658 7676 case NFS4_CHECK_STATEID_EXPIRED:
7659 7677 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7660 7678 break;
7661 7679 case NFS4_CHECK_STATEID_CLOSED:
7662 7680 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7663 7681 break;
7664 7682 case NFS4_CHECK_STATEID_REPLAY:
7665 7683 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7666 7684 resop)) {
7667 7685 case NFS4_CHKSEQ_OKAY:
7668 7686 /*
7669 7687 * This is replayed stateid; if seqid matches
7670 7688 * next expected, then client is using wrong seqid.
7671 7689 */
7672 7690 /* fall through */
7673 7691 case NFS4_CHKSEQ_BAD:
7674 7692 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7675 7693 break;
7676 7694 case NFS4_CHKSEQ_REPLAY:
7677 7695 /*
7678 7696 * Note this case is the duplicate case so
7679 7697 * resp->status is already set.
7680 7698 */
7681 7699 *cs->statusp = resp->status;
7682 7700 rfs4_update_lease(sp->rs_owner->ro_client);
7683 7701 break;
7684 7702 }
7685 7703 break;
7686 7704 case NFS4_CHECK_STATEID_UNCONFIRMED:
7687 7705 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7688 7706 resop) != NFS4_CHKSEQ_OKAY) {
7689 7707 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7690 7708 break;
7691 7709 }
7692 7710 *cs->statusp = resp->status = NFS4_OK;
7693 7711
7694 7712 next_stateid(&sp->rs_stateid);
7695 7713 resp->open_stateid = sp->rs_stateid.stateid;
7696 7714 sp->rs_owner->ro_need_confirm = FALSE;
7697 7715 rfs4_update_lease(sp->rs_owner->ro_client);
7698 7716 rfs4_update_open_sequence(sp->rs_owner);
7699 7717 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7700 7718 break;
7701 7719 default:
7702 7720 ASSERT(FALSE);
7703 7721 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7704 7722 break;
7705 7723 }
7706 7724 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7707 7725 rfs4_state_rele(sp);
7708 7726
7709 7727 out:
7710 7728 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7711 7729 OPEN_CONFIRM4res *, resp);
7712 7730 }
7713 7731
7714 7732 /*ARGSUSED*/
7715 7733 void
7716 7734 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7717 7735 struct svc_req *req, struct compound_state *cs)
7718 7736 {
7719 7737 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7720 7738 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7721 7739 uint32_t access = args->share_access;
7722 7740 uint32_t deny = args->share_deny;
7723 7741 nfsstat4 status;
7724 7742 rfs4_state_t *sp;
7725 7743 rfs4_file_t *fp;
7726 7744 int fflags = 0;
7727 7745
7728 7746 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7729 7747 OPEN_DOWNGRADE4args *, args);
7730 7748
7731 7749 if (cs->vp == NULL) {
7732 7750 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7733 7751 goto out;
7734 7752 }
7735 7753
7736 7754 if (cs->vp->v_type != VREG) {
7737 7755 *cs->statusp = resp->status = NFS4ERR_INVAL;
7738 7756 return;
7739 7757 }
7740 7758
7741 7759 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7742 7760 if (status != NFS4_OK) {
7743 7761 *cs->statusp = resp->status = status;
7744 7762 goto out;
7745 7763 }
7746 7764
7747 7765 /* Ensure specified filehandle matches */
7748 7766 if (cs->vp != sp->rs_finfo->rf_vp) {
7749 7767 rfs4_state_rele(sp);
7750 7768 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7751 7769 goto out;
7752 7770 }
7753 7771
7754 7772 /* hold off other access to open_owner while we tinker */
7755 7773 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7756 7774
7757 7775 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7758 7776 case NFS4_CHECK_STATEID_OKAY:
7759 7777 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7760 7778 resop) != NFS4_CHKSEQ_OKAY) {
7761 7779 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7762 7780 goto end;
7763 7781 }
7764 7782 break;
7765 7783 case NFS4_CHECK_STATEID_OLD:
7766 7784 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7767 7785 goto end;
7768 7786 case NFS4_CHECK_STATEID_BAD:
7769 7787 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7770 7788 goto end;
7771 7789 case NFS4_CHECK_STATEID_EXPIRED:
7772 7790 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7773 7791 goto end;
7774 7792 case NFS4_CHECK_STATEID_CLOSED:
7775 7793 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7776 7794 goto end;
7777 7795 case NFS4_CHECK_STATEID_UNCONFIRMED:
7778 7796 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7779 7797 goto end;
7780 7798 case NFS4_CHECK_STATEID_REPLAY:
7781 7799 /* Check the sequence id for the open owner */
7782 7800 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7783 7801 resop)) {
7784 7802 case NFS4_CHKSEQ_OKAY:
7785 7803 /*
7786 7804 * This is replayed stateid; if seqid matches
7787 7805 * next expected, then client is using wrong seqid.
7788 7806 */
7789 7807 /* fall through */
7790 7808 case NFS4_CHKSEQ_BAD:
7791 7809 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7792 7810 goto end;
7793 7811 case NFS4_CHKSEQ_REPLAY:
7794 7812 /*
7795 7813 * Note this case is the duplicate case so
7796 7814 * resp->status is already set.
7797 7815 */
7798 7816 *cs->statusp = resp->status;
7799 7817 rfs4_update_lease(sp->rs_owner->ro_client);
7800 7818 goto end;
7801 7819 }
7802 7820 break;
7803 7821 default:
7804 7822 ASSERT(FALSE);
7805 7823 break;
7806 7824 }
7807 7825
7808 7826 rfs4_dbe_lock(sp->rs_dbe);
7809 7827 /*
7810 7828 * Check that the new access modes and deny modes are valid.
7811 7829 * Check that no invalid bits are set.
7812 7830 */
7813 7831 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7814 7832 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7815 7833 *cs->statusp = resp->status = NFS4ERR_INVAL;
7816 7834 rfs4_update_open_sequence(sp->rs_owner);
7817 7835 rfs4_dbe_unlock(sp->rs_dbe);
7818 7836 goto end;
7819 7837 }
7820 7838
7821 7839 /*
7822 7840 * The new modes must be a subset of the current modes and
7823 7841 * the access must specify at least one mode. To test that
7824 7842 * the new mode is a subset of the current modes we bitwise
7825 7843 * AND them together and check that the result equals the new
7826 7844 * mode. For example:
7827 7845 * New mode, access == R and current mode, sp->rs_open_access == RW
7828 7846 * access & sp->rs_open_access == R == access, so the new access mode
7829 7847 * is valid. Consider access == RW, sp->rs_open_access = R
7830 7848 * access & sp->rs_open_access == R != access, so the new access mode
7831 7849 * is invalid.
7832 7850 */
7833 7851 if ((access & sp->rs_open_access) != access ||
7834 7852 (deny & sp->rs_open_deny) != deny ||
7835 7853 (access &
7836 7854 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7837 7855 *cs->statusp = resp->status = NFS4ERR_INVAL;
7838 7856 rfs4_update_open_sequence(sp->rs_owner);
7839 7857 rfs4_dbe_unlock(sp->rs_dbe);
7840 7858 goto end;
7841 7859 }
7842 7860
7843 7861 /*
7844 7862 * Release any share locks associated with this stateID.
7845 7863 * Strictly speaking, this violates the spec because the
7846 7864 * spec effectively requires that open downgrade be atomic.
7847 7865 * At present, fs_shrlock does not have this capability.
7848 7866 */
7849 7867 (void) rfs4_unshare(sp);
7850 7868
7851 7869 status = rfs4_share(sp, access, deny);
7852 7870 if (status != NFS4_OK) {
7853 7871 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7854 7872 rfs4_update_open_sequence(sp->rs_owner);
7855 7873 rfs4_dbe_unlock(sp->rs_dbe);
7856 7874 goto end;
7857 7875 }
7858 7876
7859 7877 fp = sp->rs_finfo;
7860 7878 rfs4_dbe_lock(fp->rf_dbe);
7861 7879
7862 7880 /*
7863 7881 * If the current mode has deny read and the new mode
7864 7882 * does not, decrement the number of deny read mode bits
7865 7883 * and if it goes to zero turn off the deny read bit
7866 7884 * on the file.
7867 7885 */
7868 7886 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7869 7887 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7870 7888 fp->rf_deny_read--;
7871 7889 if (fp->rf_deny_read == 0)
7872 7890 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7873 7891 }
7874 7892
7875 7893 /*
7876 7894 * If the current mode has deny write and the new mode
7877 7895 * does not, decrement the number of deny write mode bits
7878 7896 * and if it goes to zero turn off the deny write bit
7879 7897 * on the file.
7880 7898 */
7881 7899 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7882 7900 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7883 7901 fp->rf_deny_write--;
7884 7902 if (fp->rf_deny_write == 0)
7885 7903 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7886 7904 }
7887 7905
7888 7906 /*
7889 7907 * If the current mode has access read and the new mode
7890 7908 * does not, decrement the number of access read mode bits
7891 7909 * and if it goes to zero turn off the access read bit
7892 7910 * on the file. set fflags to FREAD for the call to
7893 7911 * vn_open_downgrade().
7894 7912 */
7895 7913 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7896 7914 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7897 7915 fp->rf_access_read--;
7898 7916 if (fp->rf_access_read == 0)
7899 7917 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7900 7918 fflags |= FREAD;
7901 7919 }
7902 7920
7903 7921 /*
7904 7922 * If the current mode has access write and the new mode
7905 7923 * does not, decrement the number of access write mode bits
7906 7924 * and if it goes to zero turn off the access write bit
7907 7925 * on the file. set fflags to FWRITE for the call to
7908 7926 * vn_open_downgrade().
7909 7927 */
7910 7928 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7911 7929 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7912 7930 fp->rf_access_write--;
7913 7931 if (fp->rf_access_write == 0)
7914 7932 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7915 7933 fflags |= FWRITE;
7916 7934 }
7917 7935
7918 7936 /* Check that the file is still accessible */
7919 7937 ASSERT(fp->rf_share_access);
7920 7938
7921 7939 rfs4_dbe_unlock(fp->rf_dbe);
7922 7940
7923 7941 /* now set the new open access and deny modes */
7924 7942 sp->rs_open_access = access;
7925 7943 sp->rs_open_deny = deny;
7926 7944
7927 7945 /*
7928 7946 * we successfully downgraded the share lock, now we need to downgrade
7929 7947 * the open. it is possible that the downgrade was only for a deny
7930 7948 * mode and we have nothing else to do.
7931 7949 */
7932 7950 if ((fflags & (FREAD|FWRITE)) != 0)
7933 7951 vn_open_downgrade(cs->vp, fflags);
7934 7952
7935 7953 /* Update the stateid */
7936 7954 next_stateid(&sp->rs_stateid);
7937 7955 resp->open_stateid = sp->rs_stateid.stateid;
7938 7956
7939 7957 rfs4_dbe_unlock(sp->rs_dbe);
7940 7958
7941 7959 *cs->statusp = resp->status = NFS4_OK;
7942 7960 /* Update the lease */
7943 7961 rfs4_update_lease(sp->rs_owner->ro_client);
7944 7962 /* And the sequence */
7945 7963 rfs4_update_open_sequence(sp->rs_owner);
7946 7964 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7947 7965
7948 7966 end:
7949 7967 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7950 7968 rfs4_state_rele(sp);
7951 7969 out:
7952 7970 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7953 7971 OPEN_DOWNGRADE4res *, resp);
7954 7972 }
7955 7973
7956 7974 static void *
7957 7975 memstr(const void *s1, const char *s2, size_t n)
7958 7976 {
7959 7977 size_t l = strlen(s2);
7960 7978 char *p = (char *)s1;
7961 7979
7962 7980 while (n >= l) {
7963 7981 if (bcmp(p, s2, l) == 0)
7964 7982 return (p);
7965 7983 p++;
7966 7984 n--;
7967 7985 }
7968 7986
7969 7987 return (NULL);
7970 7988 }
7971 7989
7972 7990 /*
7973 7991 * The logic behind this function is detailed in the NFSv4 RFC in the
7974 7992 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
7975 7993 * that section for explicit guidance to server behavior for
7976 7994 * SETCLIENTID.
7977 7995 */
7978 7996 void
7979 7997 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7980 7998 struct svc_req *req, struct compound_state *cs)
7981 7999 {
7982 8000 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7983 8001 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7984 8002 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7985 8003 rfs4_clntip_t *ci;
7986 8004 bool_t create;
7987 8005 char *addr, *netid;
7988 8006 int len;
7989 8007
7990 8008 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7991 8009 SETCLIENTID4args *, args);
7992 8010 retry:
7993 8011 newcp = cp_confirmed = cp_unconfirmed = NULL;
7994 8012
7995 8013 /*
7996 8014 * Save the caller's IP address
7997 8015 */
7998 8016 args->client.cl_addr =
7999 8017 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8000 8018
8001 8019 /*
8002 8020 * Record if it is a Solaris client that cannot handle referrals.
8003 8021 */
8004 8022 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8005 8023 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8006 8024 /* Add a "yes, it's downrev" record */
8007 8025 create = TRUE;
8008 8026 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8009 8027 ASSERT(ci != NULL);
8010 8028 rfs4_dbe_rele(ci->ri_dbe);
8011 8029 } else {
8012 8030 /* Remove any previous record */
8013 8031 rfs4_invalidate_clntip(args->client.cl_addr);
8014 8032 }
8015 8033
8016 8034 /*
8017 8035 * In search of an EXISTING client matching the incoming
8018 8036 * request to establish a new client identifier at the server
8019 8037 */
8020 8038 create = TRUE;
8021 8039 cp = rfs4_findclient(&args->client, &create, NULL);
8022 8040
8023 8041 /* Should never happen */
8024 8042 ASSERT(cp != NULL);
8025 8043
8026 8044 if (cp == NULL) {
8027 8045 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8028 8046 goto out;
8029 8047 }
8030 8048
8031 8049 /*
8032 8050 * Easiest case. Client identifier is newly created and is
8033 8051 * unconfirmed. Also note that for this case, no other
8034 8052 * entries exist for the client identifier. Nothing else to
8035 8053 * check. Just setup the response and respond.
8036 8054 */
8037 8055 if (create) {
8038 8056 *cs->statusp = res->status = NFS4_OK;
8039 8057 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8040 8058 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8041 8059 cp->rc_confirm_verf;
8042 8060 /* Setup callback information; CB_NULL confirmation later */
8043 8061 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8044 8062
8045 8063 rfs4_client_rele(cp);
8046 8064 goto out;
8047 8065 }
8048 8066
8049 8067 /*
8050 8068 * An existing, confirmed client may exist but it may not have
8051 8069 * been active for at least one lease period. If so, then
8052 8070 * "close" the client and create a new client identifier
8053 8071 */
8054 8072 if (rfs4_lease_expired(cp)) {
8055 8073 rfs4_client_close(cp);
8056 8074 goto retry;
8057 8075 }
8058 8076
8059 8077 if (cp->rc_need_confirm == TRUE)
8060 8078 cp_unconfirmed = cp;
8061 8079 else
8062 8080 cp_confirmed = cp;
8063 8081
8064 8082 cp = NULL;
8065 8083
8066 8084 /*
8067 8085 * We have a confirmed client, now check for an
8068 8086 * unconfimred entry
8069 8087 */
8070 8088 if (cp_confirmed) {
8071 8089 /* If creds don't match then client identifier is inuse */
8072 8090 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8073 8091 rfs4_cbinfo_t *cbp;
8074 8092 /*
8075 8093 * Some one else has established this client
8076 8094 * id. Try and say * who they are. We will use
8077 8095 * the call back address supplied by * the
8078 8096 * first client.
8079 8097 */
8080 8098 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8081 8099
8082 8100 addr = netid = NULL;
8083 8101
8084 8102 cbp = &cp_confirmed->rc_cbinfo;
8085 8103 if (cbp->cb_callback.cb_location.r_addr &&
8086 8104 cbp->cb_callback.cb_location.r_netid) {
8087 8105 cb_client4 *cbcp = &cbp->cb_callback;
8088 8106
8089 8107 len = strlen(cbcp->cb_location.r_addr)+1;
8090 8108 addr = kmem_alloc(len, KM_SLEEP);
8091 8109 bcopy(cbcp->cb_location.r_addr, addr, len);
8092 8110 len = strlen(cbcp->cb_location.r_netid)+1;
8093 8111 netid = kmem_alloc(len, KM_SLEEP);
8094 8112 bcopy(cbcp->cb_location.r_netid, netid, len);
8095 8113 }
8096 8114
8097 8115 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8098 8116 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8099 8117
8100 8118 rfs4_client_rele(cp_confirmed);
8101 8119 }
8102 8120
8103 8121 /*
8104 8122 * Confirmed, creds match, and verifier matches; must
8105 8123 * be an update of the callback info
8106 8124 */
8107 8125 if (cp_confirmed->rc_nfs_client.verifier ==
8108 8126 args->client.verifier) {
8109 8127 /* Setup callback information */
8110 8128 rfs4_client_setcb(cp_confirmed, &args->callback,
8111 8129 args->callback_ident);
8112 8130
8113 8131 /* everything okay -- move ahead */
8114 8132 *cs->statusp = res->status = NFS4_OK;
8115 8133 res->SETCLIENTID4res_u.resok4.clientid =
8116 8134 cp_confirmed->rc_clientid;
8117 8135
8118 8136 /* update the confirm_verifier and return it */
8119 8137 rfs4_client_scv_next(cp_confirmed);
8120 8138 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8121 8139 cp_confirmed->rc_confirm_verf;
8122 8140
8123 8141 rfs4_client_rele(cp_confirmed);
8124 8142 goto out;
8125 8143 }
8126 8144
8127 8145 /*
8128 8146 * Creds match but the verifier doesn't. Must search
8129 8147 * for an unconfirmed client that would be replaced by
8130 8148 * this request.
8131 8149 */
8132 8150 create = FALSE;
8133 8151 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8134 8152 cp_confirmed);
8135 8153 }
8136 8154
8137 8155 /*
8138 8156 * At this point, we have taken care of the brand new client
8139 8157 * struct, INUSE case, update of an existing, and confirmed
8140 8158 * client struct.
8141 8159 */
8142 8160
8143 8161 /*
8144 8162 * check to see if things have changed while we originally
8145 8163 * picked up the client struct. If they have, then return and
8146 8164 * retry the processing of this SETCLIENTID request.
8147 8165 */
8148 8166 if (cp_unconfirmed) {
8149 8167 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8150 8168 if (!cp_unconfirmed->rc_need_confirm) {
8151 8169 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8152 8170 rfs4_client_rele(cp_unconfirmed);
8153 8171 if (cp_confirmed)
8154 8172 rfs4_client_rele(cp_confirmed);
8155 8173 goto retry;
8156 8174 }
8157 8175 /* do away with the old unconfirmed one */
8158 8176 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8159 8177 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8160 8178 rfs4_client_rele(cp_unconfirmed);
8161 8179 cp_unconfirmed = NULL;
8162 8180 }
8163 8181
8164 8182 /*
8165 8183 * This search will temporarily hide the confirmed client
8166 8184 * struct while a new client struct is created as the
8167 8185 * unconfirmed one.
8168 8186 */
8169 8187 create = TRUE;
8170 8188 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8171 8189
8172 8190 ASSERT(newcp != NULL);
8173 8191
8174 8192 if (newcp == NULL) {
8175 8193 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8176 8194 rfs4_client_rele(cp_confirmed);
8177 8195 goto out;
8178 8196 }
8179 8197
8180 8198 /*
8181 8199 * If one was not created, then a similar request must be in
8182 8200 * process so release and start over with this one
8183 8201 */
8184 8202 if (create != TRUE) {
8185 8203 rfs4_client_rele(newcp);
8186 8204 if (cp_confirmed)
8187 8205 rfs4_client_rele(cp_confirmed);
8188 8206 goto retry;
8189 8207 }
8190 8208
8191 8209 *cs->statusp = res->status = NFS4_OK;
8192 8210 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8193 8211 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8194 8212 newcp->rc_confirm_verf;
8195 8213 /* Setup callback information; CB_NULL confirmation later */
8196 8214 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8197 8215
8198 8216 newcp->rc_cp_confirmed = cp_confirmed;
8199 8217
8200 8218 rfs4_client_rele(newcp);
8201 8219
8202 8220 out:
8203 8221 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8204 8222 SETCLIENTID4res *, res);
8205 8223 }
8206 8224
8207 8225 /*ARGSUSED*/
8208 8226 void
8209 8227 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8210 8228 struct svc_req *req, struct compound_state *cs)
8211 8229 {
8212 8230 SETCLIENTID_CONFIRM4args *args =
8213 8231 &argop->nfs_argop4_u.opsetclientid_confirm;
8214 8232 SETCLIENTID_CONFIRM4res *res =
8215 8233 &resop->nfs_resop4_u.opsetclientid_confirm;
8216 8234 rfs4_client_t *cp, *cptoclose = NULL;
8217 8235 nfs4_srv_t *nsrv4;
8218 8236
8219 8237 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8220 8238 struct compound_state *, cs,
8221 8239 SETCLIENTID_CONFIRM4args *, args);
8222 8240
8223 8241 nsrv4 = nfs4_get_srv();
8224 8242 *cs->statusp = res->status = NFS4_OK;
8225 8243
8226 8244 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8227 8245
8228 8246 if (cp == NULL) {
8229 8247 *cs->statusp = res->status =
8230 8248 rfs4_check_clientid(&args->clientid, 1);
8231 8249 goto out;
8232 8250 }
8233 8251
8234 8252 if (!creds_ok(cp, req, cs)) {
8235 8253 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8236 8254 rfs4_client_rele(cp);
8237 8255 goto out;
8238 8256 }
8239 8257
8240 8258 /* If the verifier doesn't match, the record doesn't match */
8241 8259 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8242 8260 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8243 8261 rfs4_client_rele(cp);
8244 8262 goto out;
8245 8263 }
8246 8264
8247 8265 rfs4_dbe_lock(cp->rc_dbe);
8248 8266 cp->rc_need_confirm = FALSE;
8249 8267 if (cp->rc_cp_confirmed) {
8250 8268 cptoclose = cp->rc_cp_confirmed;
8251 8269 cptoclose->rc_ss_remove = 1;
8252 8270 cp->rc_cp_confirmed = NULL;
8253 8271 }
8254 8272
8255 8273 /*
8256 8274 * Update the client's associated server instance, if it's changed
8257 8275 * since the client was created.
8258 8276 */
8259 8277 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8260 8278 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8261 8279
8262 8280 /*
8263 8281 * Record clientid in stable storage.
8264 8282 * Must be done after server instance has been assigned.
8265 8283 */
8266 8284 rfs4_ss_clid(nsrv4, cp);
8267 8285
8268 8286 rfs4_dbe_unlock(cp->rc_dbe);
8269 8287
8270 8288 if (cptoclose)
8271 8289 /* don't need to rele, client_close does it */
8272 8290 rfs4_client_close(cptoclose);
8273 8291
8274 8292 /* If needed, initiate CB_NULL call for callback path */
8275 8293 rfs4_deleg_cb_check(cp);
8276 8294 rfs4_update_lease(cp);
8277 8295
8278 8296 /*
8279 8297 * Check to see if client can perform reclaims
8280 8298 */
8281 8299 rfs4_ss_chkclid(nsrv4, cp);
8282 8300
8283 8301 rfs4_client_rele(cp);
8284 8302
8285 8303 out:
8286 8304 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8287 8305 struct compound_state *, cs,
8288 8306 SETCLIENTID_CONFIRM4 *, res);
8289 8307 }
8290 8308
8291 8309
8292 8310 /*ARGSUSED*/
8293 8311 void
8294 8312 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8295 8313 struct svc_req *req, struct compound_state *cs)
8296 8314 {
8297 8315 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8298 8316 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8299 8317 rfs4_state_t *sp;
8300 8318 nfsstat4 status;
8301 8319
8302 8320 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8303 8321 CLOSE4args *, args);
8304 8322
8305 8323 if (cs->vp == NULL) {
8306 8324 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8307 8325 goto out;
8308 8326 }
8309 8327
8310 8328 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8311 8329 if (status != NFS4_OK) {
8312 8330 *cs->statusp = resp->status = status;
8313 8331 goto out;
8314 8332 }
8315 8333
8316 8334 /* Ensure specified filehandle matches */
8317 8335 if (cs->vp != sp->rs_finfo->rf_vp) {
8318 8336 rfs4_state_rele(sp);
8319 8337 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8320 8338 goto out;
8321 8339 }
8322 8340
8323 8341 /* hold off other access to open_owner while we tinker */
8324 8342 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8325 8343
8326 8344 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8327 8345 case NFS4_CHECK_STATEID_OKAY:
8328 8346 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8329 8347 resop) != NFS4_CHKSEQ_OKAY) {
8330 8348 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8331 8349 goto end;
8332 8350 }
8333 8351 break;
8334 8352 case NFS4_CHECK_STATEID_OLD:
8335 8353 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8336 8354 goto end;
8337 8355 case NFS4_CHECK_STATEID_BAD:
8338 8356 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8339 8357 goto end;
8340 8358 case NFS4_CHECK_STATEID_EXPIRED:
8341 8359 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8342 8360 goto end;
8343 8361 case NFS4_CHECK_STATEID_CLOSED:
8344 8362 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8345 8363 goto end;
8346 8364 case NFS4_CHECK_STATEID_UNCONFIRMED:
8347 8365 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8348 8366 goto end;
8349 8367 case NFS4_CHECK_STATEID_REPLAY:
8350 8368 /* Check the sequence id for the open owner */
8351 8369 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8352 8370 resop)) {
8353 8371 case NFS4_CHKSEQ_OKAY:
8354 8372 /*
8355 8373 * This is replayed stateid; if seqid matches
8356 8374 * next expected, then client is using wrong seqid.
8357 8375 */
8358 8376 /* FALL THROUGH */
8359 8377 case NFS4_CHKSEQ_BAD:
8360 8378 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8361 8379 goto end;
8362 8380 case NFS4_CHKSEQ_REPLAY:
8363 8381 /*
8364 8382 * Note this case is the duplicate case so
8365 8383 * resp->status is already set.
8366 8384 */
8367 8385 *cs->statusp = resp->status;
8368 8386 rfs4_update_lease(sp->rs_owner->ro_client);
8369 8387 goto end;
8370 8388 }
8371 8389 break;
8372 8390 default:
8373 8391 ASSERT(FALSE);
8374 8392 break;
8375 8393 }
8376 8394
8377 8395 rfs4_dbe_lock(sp->rs_dbe);
8378 8396
8379 8397 /* Update the stateid. */
8380 8398 next_stateid(&sp->rs_stateid);
8381 8399 resp->open_stateid = sp->rs_stateid.stateid;
8382 8400
8383 8401 rfs4_dbe_unlock(sp->rs_dbe);
8384 8402
8385 8403 rfs4_update_lease(sp->rs_owner->ro_client);
8386 8404 rfs4_update_open_sequence(sp->rs_owner);
8387 8405 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8388 8406
8389 8407 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8390 8408
8391 8409 *cs->statusp = resp->status = status;
8392 8410
8393 8411 end:
8394 8412 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8395 8413 rfs4_state_rele(sp);
8396 8414 out:
8397 8415 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8398 8416 CLOSE4res *, resp);
8399 8417 }
8400 8418
8401 8419 /*
8402 8420 * Manage the counts on the file struct and close all file locks
8403 8421 */
8404 8422 /*ARGSUSED*/
8405 8423 void
8406 8424 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8407 8425 bool_t close_of_client)
8408 8426 {
8409 8427 rfs4_file_t *fp = sp->rs_finfo;
8410 8428 rfs4_lo_state_t *lsp;
8411 8429 int fflags = 0;
8412 8430
8413 8431 /*
8414 8432 * If this call is part of the larger closing down of client
8415 8433 * state then it is just easier to release all locks
8416 8434 * associated with this client instead of going through each
8417 8435 * individual file and cleaning locks there.
8418 8436 */
8419 8437 if (close_of_client) {
8420 8438 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8421 8439 !list_is_empty(&sp->rs_lostatelist) &&
8422 8440 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8423 8441 /* Is the PxFS kernel module loaded? */
8424 8442 if (lm_remove_file_locks != NULL) {
8425 8443 int new_sysid;
8426 8444
8427 8445 /* Encode the cluster nodeid in new sysid */
8428 8446 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8429 8447 lm_set_nlmid_flk(&new_sysid);
8430 8448
8431 8449 /*
8432 8450 * This PxFS routine removes file locks for a
8433 8451 * client over all nodes of a cluster.
8434 8452 */
8435 8453 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8436 8454 "lm_remove_file_locks(sysid=0x%x)\n",
8437 8455 new_sysid));
8438 8456 (*lm_remove_file_locks)(new_sysid);
8439 8457 } else {
8440 8458 struct flock64 flk;
8441 8459
8442 8460 /* Release all locks for this client */
8443 8461 flk.l_type = F_UNLKSYS;
8444 8462 flk.l_whence = 0;
8445 8463 flk.l_start = 0;
8446 8464 flk.l_len = 0;
8447 8465 flk.l_sysid =
8448 8466 sp->rs_owner->ro_client->rc_sysidt;
8449 8467 flk.l_pid = 0;
8450 8468 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8451 8469 &flk, F_REMOTELOCK | FREAD | FWRITE,
8452 8470 (u_offset_t)0, NULL, CRED(), NULL);
8453 8471 }
8454 8472
8455 8473 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8456 8474 }
8457 8475 }
8458 8476
8459 8477 /*
8460 8478 * Release all locks on this file by this lock owner or at
8461 8479 * least mark the locks as having been released
8462 8480 */
8463 8481 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8464 8482 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8465 8483 lsp->rls_locks_cleaned = TRUE;
8466 8484
8467 8485 /* Was this already taken care of above? */
8468 8486 if (!close_of_client &&
8469 8487 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8470 8488 (void) cleanlocks(sp->rs_finfo->rf_vp,
8471 8489 lsp->rls_locker->rl_pid,
8472 8490 lsp->rls_locker->rl_client->rc_sysidt);
8473 8491 }
8474 8492
8475 8493 /*
8476 8494 * Release any shrlocks associated with this open state ID.
8477 8495 * This must be done before the rfs4_state gets marked closed.
8478 8496 */
8479 8497 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8480 8498 (void) rfs4_unshare(sp);
8481 8499
8482 8500 if (sp->rs_open_access) {
8483 8501 rfs4_dbe_lock(fp->rf_dbe);
8484 8502
8485 8503 /*
8486 8504 * Decrement the count for each access and deny bit that this
8487 8505 * state has contributed to the file.
8488 8506 * If the file counts go to zero
8489 8507 * clear the appropriate bit in the appropriate mask.
8490 8508 */
8491 8509 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8492 8510 fp->rf_access_read--;
8493 8511 fflags |= FREAD;
8494 8512 if (fp->rf_access_read == 0)
8495 8513 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8496 8514 }
8497 8515 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8498 8516 fp->rf_access_write--;
8499 8517 fflags |= FWRITE;
8500 8518 if (fp->rf_access_write == 0)
8501 8519 fp->rf_share_access &=
8502 8520 ~OPEN4_SHARE_ACCESS_WRITE;
8503 8521 }
8504 8522 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8505 8523 fp->rf_deny_read--;
8506 8524 if (fp->rf_deny_read == 0)
8507 8525 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8508 8526 }
8509 8527 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8510 8528 fp->rf_deny_write--;
8511 8529 if (fp->rf_deny_write == 0)
8512 8530 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8513 8531 }
8514 8532
8515 8533 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8516 8534
8517 8535 rfs4_dbe_unlock(fp->rf_dbe);
8518 8536
8519 8537 sp->rs_open_access = 0;
8520 8538 sp->rs_open_deny = 0;
8521 8539 }
8522 8540 }
8523 8541
8524 8542 /*
8525 8543 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8526 8544 */
8527 8545 static nfsstat4
8528 8546 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8529 8547 {
8530 8548 rfs4_lockowner_t *lo;
8531 8549 rfs4_client_t *cp;
8532 8550 uint32_t len;
8533 8551
8534 8552 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8535 8553 if (lo != NULL) {
8536 8554 cp = lo->rl_client;
8537 8555 if (rfs4_lease_expired(cp)) {
8538 8556 rfs4_lockowner_rele(lo);
8539 8557 rfs4_dbe_hold(cp->rc_dbe);
8540 8558 rfs4_client_close(cp);
8541 8559 return (NFS4ERR_EXPIRED);
8542 8560 }
8543 8561 dp->owner.clientid = lo->rl_owner.clientid;
8544 8562 len = lo->rl_owner.owner_len;
8545 8563 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8546 8564 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8547 8565 dp->owner.owner_len = len;
8548 8566 rfs4_lockowner_rele(lo);
8549 8567 goto finish;
8550 8568 }
8551 8569
8552 8570 /*
8553 8571 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8554 8572 * of the client id contain the boot time for a NFS4 lock. So we
8555 8573 * fabricate and identity by setting clientid to the sysid, and
8556 8574 * the lock owner to the pid.
8557 8575 */
8558 8576 dp->owner.clientid = flk->l_sysid;
8559 8577 len = sizeof (pid_t);
8560 8578 dp->owner.owner_len = len;
8561 8579 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8562 8580 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8563 8581 finish:
8564 8582 dp->offset = flk->l_start;
8565 8583 dp->length = flk->l_len;
8566 8584
8567 8585 if (flk->l_type == F_RDLCK)
8568 8586 dp->locktype = READ_LT;
8569 8587 else if (flk->l_type == F_WRLCK)
8570 8588 dp->locktype = WRITE_LT;
8571 8589 else
8572 8590 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8573 8591
8574 8592 return (NFS4_OK);
8575 8593 }
8576 8594
8577 8595 /*
8578 8596 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8579 8597 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8580 8598 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8581 8599 * for that (obviously); they are sending the LOCK requests with some delays
8582 8600 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8583 8601 * locking and delay implementation at the client side.
8584 8602 *
8585 8603 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8586 8604 * fast retries on its own (the for loop below) in a hope the lock will be
8587 8605 * available soon. And if not, the client won't need to resend the LOCK
8588 8606 * requests so fast to check the lock availability. This basically saves some
8589 8607 * network traffic and tries to make sure the client gets the lock ASAP.
8590 8608 */
8591 8609 static int
8592 8610 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8593 8611 {
8594 8612 int error;
8595 8613 struct flock64 flk;
8596 8614 int i;
8597 8615 clock_t delaytime;
8598 8616 int cmd;
8599 8617 int spin_cnt = 0;
8600 8618
8601 8619 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8602 8620 retry:
8603 8621 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8604 8622
8605 8623 for (i = 0; i < rfs4_maxlock_tries; i++) {
8606 8624 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8607 8625 error = VOP_FRLOCK(vp, cmd,
8608 8626 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8609 8627
8610 8628 if (error != EAGAIN && error != EACCES)
8611 8629 break;
8612 8630
8613 8631 if (i < rfs4_maxlock_tries - 1) {
8614 8632 delay(delaytime);
8615 8633 delaytime *= 2;
8616 8634 }
8617 8635 }
8618 8636
8619 8637 if (error == EAGAIN || error == EACCES) {
8620 8638 /* Get the owner of the lock */
8621 8639 flk = *flock;
8622 8640 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8623 8641 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8624 8642 NULL) == 0) {
8625 8643 /*
8626 8644 * There's a race inherent in the current VOP_FRLOCK
8627 8645 * design where:
8628 8646 * a: "other guy" takes a lock that conflicts with a
8629 8647 * lock we want
8630 8648 * b: we attempt to take our lock (non-blocking) and
8631 8649 * the attempt fails.
8632 8650 * c: "other guy" releases the conflicting lock
8633 8651 * d: we ask what lock conflicts with the lock we want,
8634 8652 * getting F_UNLCK (no lock blocks us)
8635 8653 *
8636 8654 * If we retry the non-blocking lock attempt in this
8637 8655 * case (restart at step 'b') there's some possibility
8638 8656 * that many such attempts might fail. However a test
8639 8657 * designed to actually provoke this race shows that
8640 8658 * the vast majority of cases require no retry, and
8641 8659 * only a few took as many as three retries. Here's
8642 8660 * the test outcome:
8643 8661 *
8644 8662 * number of retries how many times we needed
8645 8663 * that many retries
8646 8664 * 0 79461
8647 8665 * 1 862
8648 8666 * 2 49
8649 8667 * 3 5
8650 8668 *
8651 8669 * Given those empirical results, we arbitrarily limit
8652 8670 * the retry count to ten.
8653 8671 *
8654 8672 * If we actually make to ten retries and give up,
8655 8673 * nothing catastrophic happens, but we're unable to
8656 8674 * return the information about the conflicting lock to
8657 8675 * the NFS client. That's an acceptable trade off vs.
8658 8676 * letting this retry loop run forever.
8659 8677 */
8660 8678 if (flk.l_type == F_UNLCK) {
8661 8679 if (spin_cnt++ < 10) {
8662 8680 /* No longer locked, retry */
8663 8681 goto retry;
8664 8682 }
8665 8683 } else {
8666 8684 *flock = flk;
8667 8685 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8668 8686 F_GETLK, &flk);
8669 8687 }
8670 8688 }
8671 8689 }
8672 8690
8673 8691 return (error);
8674 8692 }
8675 8693
8676 8694 /*ARGSUSED*/
8677 8695 static nfsstat4
8678 8696 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8679 8697 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8680 8698 {
8681 8699 nfsstat4 status;
8682 8700 rfs4_lockowner_t *lo = lsp->rls_locker;
8683 8701 rfs4_state_t *sp = lsp->rls_state;
8684 8702 struct flock64 flock;
8685 8703 int16_t ltype;
8686 8704 int flag;
8687 8705 int error;
8688 8706 sysid_t sysid;
8689 8707 LOCK4res *lres;
8690 8708 vnode_t *vp;
8691 8709
8692 8710 if (rfs4_lease_expired(lo->rl_client)) {
8693 8711 return (NFS4ERR_EXPIRED);
8694 8712 }
8695 8713
8696 8714 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8697 8715 return (status);
8698 8716
8699 8717 /* Check for zero length. To lock to end of file use all ones for V4 */
8700 8718 if (length == 0)
8701 8719 return (NFS4ERR_INVAL);
8702 8720 else if (length == (length4)(~0))
8703 8721 length = 0; /* Posix to end of file */
8704 8722
8705 8723 retry:
8706 8724 rfs4_dbe_lock(sp->rs_dbe);
8707 8725 if (sp->rs_closed == TRUE) {
8708 8726 rfs4_dbe_unlock(sp->rs_dbe);
8709 8727 return (NFS4ERR_OLD_STATEID);
8710 8728 }
8711 8729
8712 8730 if (resop->resop != OP_LOCKU) {
8713 8731 switch (locktype) {
8714 8732 case READ_LT:
8715 8733 case READW_LT:
8716 8734 if ((sp->rs_share_access
8717 8735 & OPEN4_SHARE_ACCESS_READ) == 0) {
8718 8736 rfs4_dbe_unlock(sp->rs_dbe);
8719 8737
8720 8738 return (NFS4ERR_OPENMODE);
8721 8739 }
8722 8740 ltype = F_RDLCK;
8723 8741 break;
8724 8742 case WRITE_LT:
8725 8743 case WRITEW_LT:
8726 8744 if ((sp->rs_share_access
8727 8745 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8728 8746 rfs4_dbe_unlock(sp->rs_dbe);
8729 8747
8730 8748 return (NFS4ERR_OPENMODE);
8731 8749 }
8732 8750 ltype = F_WRLCK;
8733 8751 break;
8734 8752 }
8735 8753 } else
8736 8754 ltype = F_UNLCK;
8737 8755
8738 8756 flock.l_type = ltype;
8739 8757 flock.l_whence = 0; /* SEEK_SET */
8740 8758 flock.l_start = offset;
8741 8759 flock.l_len = length;
8742 8760 flock.l_sysid = sysid;
8743 8761 flock.l_pid = lsp->rls_locker->rl_pid;
8744 8762
8745 8763 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8746 8764 if (flock.l_len < 0 || flock.l_start < 0) {
8747 8765 rfs4_dbe_unlock(sp->rs_dbe);
8748 8766 return (NFS4ERR_INVAL);
8749 8767 }
8750 8768
8751 8769 /*
8752 8770 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8753 8771 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8754 8772 */
8755 8773 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8756 8774
8757 8775 vp = sp->rs_finfo->rf_vp;
8758 8776 VN_HOLD(vp);
8759 8777
8760 8778 /*
8761 8779 * We need to unlock sp before we call the underlying filesystem to
8762 8780 * acquire the file lock.
8763 8781 */
8764 8782 rfs4_dbe_unlock(sp->rs_dbe);
8765 8783
8766 8784 error = setlock(vp, &flock, flag, cred);
8767 8785
8768 8786 /*
8769 8787 * Make sure the file is still open. In a case the file was closed in
8770 8788 * the meantime, clean the lock we acquired using the setlock() call
8771 8789 * above, and return the appropriate error.
8772 8790 */
8773 8791 rfs4_dbe_lock(sp->rs_dbe);
8774 8792 if (sp->rs_closed == TRUE) {
8775 8793 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8776 8794 rfs4_dbe_unlock(sp->rs_dbe);
8777 8795
8778 8796 VN_RELE(vp);
8779 8797
8780 8798 return (NFS4ERR_OLD_STATEID);
8781 8799 }
8782 8800 rfs4_dbe_unlock(sp->rs_dbe);
8783 8801
8784 8802 VN_RELE(vp);
8785 8803
8786 8804 if (error == 0) {
8787 8805 rfs4_dbe_lock(lsp->rls_dbe);
8788 8806 next_stateid(&lsp->rls_lockid);
8789 8807 rfs4_dbe_unlock(lsp->rls_dbe);
8790 8808 }
8791 8809
8792 8810 /*
8793 8811 * N.B. We map error values to nfsv4 errors. This is differrent
8794 8812 * than puterrno4 routine.
8795 8813 */
8796 8814 switch (error) {
8797 8815 case 0:
8798 8816 status = NFS4_OK;
8799 8817 break;
8800 8818 case EAGAIN:
8801 8819 case EACCES: /* Old value */
8802 8820 /* Can only get here if op is OP_LOCK */
8803 8821 ASSERT(resop->resop == OP_LOCK);
8804 8822 lres = &resop->nfs_resop4_u.oplock;
8805 8823 status = NFS4ERR_DENIED;
8806 8824 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8807 8825 == NFS4ERR_EXPIRED)
8808 8826 goto retry;
8809 8827 break;
8810 8828 case ENOLCK:
8811 8829 status = NFS4ERR_DELAY;
8812 8830 break;
8813 8831 case EOVERFLOW:
8814 8832 status = NFS4ERR_INVAL;
8815 8833 break;
8816 8834 case EINVAL:
8817 8835 status = NFS4ERR_NOTSUPP;
8818 8836 break;
8819 8837 default:
8820 8838 status = NFS4ERR_SERVERFAULT;
8821 8839 break;
8822 8840 }
8823 8841
8824 8842 return (status);
8825 8843 }
8826 8844
8827 8845 /*ARGSUSED*/
8828 8846 void
8829 8847 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8830 8848 struct svc_req *req, struct compound_state *cs)
8831 8849 {
8832 8850 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8833 8851 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8834 8852 nfsstat4 status;
8835 8853 stateid4 *stateid;
8836 8854 rfs4_lockowner_t *lo;
8837 8855 rfs4_client_t *cp;
8838 8856 rfs4_state_t *sp = NULL;
8839 8857 rfs4_lo_state_t *lsp = NULL;
8840 8858 bool_t ls_sw_held = FALSE;
8841 8859 bool_t create = TRUE;
8842 8860 bool_t lcreate = TRUE;
8843 8861 bool_t dup_lock = FALSE;
8844 8862 int rc;
8845 8863
8846 8864 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8847 8865 LOCK4args *, args);
8848 8866
8849 8867 if (cs->vp == NULL) {
8850 8868 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8851 8869 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8852 8870 cs, LOCK4res *, resp);
8853 8871 return;
8854 8872 }
8855 8873
8856 8874 if (args->locker.new_lock_owner) {
8857 8875 /* Create a new lockowner for this instance */
8858 8876 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8859 8877
8860 8878 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8861 8879
8862 8880 stateid = &olo->open_stateid;
8863 8881 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8864 8882 if (status != NFS4_OK) {
8865 8883 NFS4_DEBUG(rfs4_debug,
8866 8884 (CE_NOTE, "Get state failed in lock %d", status));
8867 8885 *cs->statusp = resp->status = status;
8868 8886 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8869 8887 cs, LOCK4res *, resp);
8870 8888 return;
8871 8889 }
8872 8890
8873 8891 /* Ensure specified filehandle matches */
8874 8892 if (cs->vp != sp->rs_finfo->rf_vp) {
8875 8893 rfs4_state_rele(sp);
8876 8894 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8877 8895 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8878 8896 cs, LOCK4res *, resp);
8879 8897 return;
8880 8898 }
8881 8899
8882 8900 /* hold off other access to open_owner while we tinker */
8883 8901 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8884 8902
8885 8903 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8886 8904 case NFS4_CHECK_STATEID_OLD:
8887 8905 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8888 8906 goto end;
8889 8907 case NFS4_CHECK_STATEID_BAD:
8890 8908 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8891 8909 goto end;
8892 8910 case NFS4_CHECK_STATEID_EXPIRED:
8893 8911 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8894 8912 goto end;
8895 8913 case NFS4_CHECK_STATEID_UNCONFIRMED:
8896 8914 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8897 8915 goto end;
8898 8916 case NFS4_CHECK_STATEID_CLOSED:
8899 8917 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8900 8918 goto end;
8901 8919 case NFS4_CHECK_STATEID_OKAY:
8902 8920 case NFS4_CHECK_STATEID_REPLAY:
8903 8921 switch (rfs4_check_olo_seqid(olo->open_seqid,
8904 8922 sp->rs_owner, resop)) {
8905 8923 case NFS4_CHKSEQ_OKAY:
8906 8924 if (rc == NFS4_CHECK_STATEID_OKAY)
8907 8925 break;
8908 8926 /*
8909 8927 * This is replayed stateid; if seqid
8910 8928 * matches next expected, then client
8911 8929 * is using wrong seqid.
8912 8930 */
8913 8931 /* FALLTHROUGH */
8914 8932 case NFS4_CHKSEQ_BAD:
8915 8933 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8916 8934 goto end;
8917 8935 case NFS4_CHKSEQ_REPLAY:
8918 8936 /* This is a duplicate LOCK request */
8919 8937 dup_lock = TRUE;
8920 8938
8921 8939 /*
8922 8940 * For a duplicate we do not want to
8923 8941 * create a new lockowner as it should
8924 8942 * already exist.
8925 8943 * Turn off the lockowner create flag.
8926 8944 */
8927 8945 lcreate = FALSE;
8928 8946 }
8929 8947 break;
8930 8948 }
8931 8949
8932 8950 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8933 8951 if (lo == NULL) {
8934 8952 NFS4_DEBUG(rfs4_debug,
8935 8953 (CE_NOTE, "rfs4_op_lock: no lock owner"));
8936 8954 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8937 8955 goto end;
8938 8956 }
8939 8957
8940 8958 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8941 8959 if (lsp == NULL) {
8942 8960 rfs4_update_lease(sp->rs_owner->ro_client);
8943 8961 /*
8944 8962 * Only update theh open_seqid if this is not
8945 8963 * a duplicate request
8946 8964 */
8947 8965 if (dup_lock == FALSE) {
8948 8966 rfs4_update_open_sequence(sp->rs_owner);
8949 8967 }
8950 8968
8951 8969 NFS4_DEBUG(rfs4_debug,
8952 8970 (CE_NOTE, "rfs4_op_lock: no state"));
8953 8971 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8954 8972 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8955 8973 rfs4_lockowner_rele(lo);
8956 8974 goto end;
8957 8975 }
8958 8976
8959 8977 /*
8960 8978 * This is the new_lock_owner branch and the client is
8961 8979 * supposed to be associating a new lock_owner with
8962 8980 * the open file at this point. If we find that a
8963 8981 * lock_owner/state association already exists and a
8964 8982 * successful LOCK request was returned to the client,
8965 8983 * an error is returned to the client since this is
8966 8984 * not appropriate. The client should be using the
8967 8985 * existing lock_owner branch.
8968 8986 */
8969 8987 if (dup_lock == FALSE && create == FALSE) {
8970 8988 if (lsp->rls_lock_completed == TRUE) {
8971 8989 *cs->statusp =
8972 8990 resp->status = NFS4ERR_BAD_SEQID;
8973 8991 rfs4_lockowner_rele(lo);
8974 8992 goto end;
8975 8993 }
8976 8994 }
8977 8995
8978 8996 rfs4_update_lease(sp->rs_owner->ro_client);
8979 8997
8980 8998 /*
8981 8999 * Only update theh open_seqid if this is not
8982 9000 * a duplicate request
8983 9001 */
8984 9002 if (dup_lock == FALSE) {
8985 9003 rfs4_update_open_sequence(sp->rs_owner);
8986 9004 }
8987 9005
8988 9006 /*
8989 9007 * If this is a duplicate lock request, just copy the
8990 9008 * previously saved reply and return.
8991 9009 */
8992 9010 if (dup_lock == TRUE) {
8993 9011 /* verify that lock_seqid's match */
8994 9012 if (lsp->rls_seqid != olo->lock_seqid) {
8995 9013 NFS4_DEBUG(rfs4_debug,
8996 9014 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8997 9015 "lsp->seqid=%d old->seqid=%d",
8998 9016 lsp->rls_seqid, olo->lock_seqid));
8999 9017 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9000 9018 } else {
9001 9019 rfs4_copy_reply(resop, &lsp->rls_reply);
9002 9020 /*
9003 9021 * Make sure to copy the just
9004 9022 * retrieved reply status into the
9005 9023 * overall compound status
9006 9024 */
9007 9025 *cs->statusp = resp->status;
9008 9026 }
9009 9027 rfs4_lockowner_rele(lo);
9010 9028 goto end;
9011 9029 }
9012 9030
9013 9031 rfs4_dbe_lock(lsp->rls_dbe);
9014 9032
9015 9033 /* Make sure to update the lock sequence id */
9016 9034 lsp->rls_seqid = olo->lock_seqid;
9017 9035
9018 9036 NFS4_DEBUG(rfs4_debug,
9019 9037 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9020 9038
9021 9039 /*
9022 9040 * This is used to signify the newly created lockowner
9023 9041 * stateid and its sequence number. The checks for
9024 9042 * sequence number and increment don't occur on the
9025 9043 * very first lock request for a lockowner.
9026 9044 */
9027 9045 lsp->rls_skip_seqid_check = TRUE;
9028 9046
9029 9047 /* hold off other access to lsp while we tinker */
9030 9048 rfs4_sw_enter(&lsp->rls_sw);
9031 9049 ls_sw_held = TRUE;
9032 9050
9033 9051 rfs4_dbe_unlock(lsp->rls_dbe);
9034 9052
9035 9053 rfs4_lockowner_rele(lo);
9036 9054 } else {
9037 9055 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9038 9056 /* get lsp and hold the lock on the underlying file struct */
9039 9057 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9040 9058 != NFS4_OK) {
9041 9059 *cs->statusp = resp->status = status;
9042 9060 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9043 9061 cs, LOCK4res *, resp);
9044 9062 return;
9045 9063 }
9046 9064 create = FALSE; /* We didn't create lsp */
9047 9065
9048 9066 /* Ensure specified filehandle matches */
9049 9067 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9050 9068 rfs4_lo_state_rele(lsp, TRUE);
9051 9069 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9052 9070 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9053 9071 cs, LOCK4res *, resp);
9054 9072 return;
9055 9073 }
9056 9074
9057 9075 /* hold off other access to lsp while we tinker */
9058 9076 rfs4_sw_enter(&lsp->rls_sw);
9059 9077 ls_sw_held = TRUE;
9060 9078
9061 9079 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9062 9080 /*
9063 9081 * The stateid looks like it was okay (expected to be
9064 9082 * the next one)
9065 9083 */
9066 9084 case NFS4_CHECK_STATEID_OKAY:
9067 9085 /*
9068 9086 * The sequence id is now checked. Determine
9069 9087 * if this is a replay or if it is in the
9070 9088 * expected (next) sequence. In the case of a
9071 9089 * replay, there are two replay conditions
9072 9090 * that may occur. The first is the normal
9073 9091 * condition where a LOCK is done with a
9074 9092 * NFS4_OK response and the stateid is
9075 9093 * updated. That case is handled below when
9076 9094 * the stateid is identified as a REPLAY. The
9077 9095 * second is the case where an error is
9078 9096 * returned, like NFS4ERR_DENIED, and the
9079 9097 * sequence number is updated but the stateid
9080 9098 * is not updated. This second case is dealt
9081 9099 * with here. So it may seem odd that the
9082 9100 * stateid is okay but the sequence id is a
9083 9101 * replay but it is okay.
9084 9102 */
9085 9103 switch (rfs4_check_lock_seqid(
9086 9104 args->locker.locker4_u.lock_owner.lock_seqid,
9087 9105 lsp, resop)) {
9088 9106 case NFS4_CHKSEQ_REPLAY:
9089 9107 if (resp->status != NFS4_OK) {
9090 9108 /*
9091 9109 * Here is our replay and need
9092 9110 * to verify that the last
9093 9111 * response was an error.
9094 9112 */
9095 9113 *cs->statusp = resp->status;
9096 9114 goto end;
9097 9115 }
9098 9116 /*
9099 9117 * This is done since the sequence id
9100 9118 * looked like a replay but it didn't
9101 9119 * pass our check so a BAD_SEQID is
9102 9120 * returned as a result.
9103 9121 */
9104 9122 /*FALLTHROUGH*/
9105 9123 case NFS4_CHKSEQ_BAD:
9106 9124 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9107 9125 goto end;
9108 9126 case NFS4_CHKSEQ_OKAY:
9109 9127 /* Everything looks okay move ahead */
9110 9128 break;
9111 9129 }
9112 9130 break;
9113 9131 case NFS4_CHECK_STATEID_OLD:
9114 9132 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9115 9133 goto end;
9116 9134 case NFS4_CHECK_STATEID_BAD:
9117 9135 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9118 9136 goto end;
9119 9137 case NFS4_CHECK_STATEID_EXPIRED:
9120 9138 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9121 9139 goto end;
9122 9140 case NFS4_CHECK_STATEID_CLOSED:
9123 9141 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9124 9142 goto end;
9125 9143 case NFS4_CHECK_STATEID_REPLAY:
9126 9144 switch (rfs4_check_lock_seqid(
9127 9145 args->locker.locker4_u.lock_owner.lock_seqid,
9128 9146 lsp, resop)) {
9129 9147 case NFS4_CHKSEQ_OKAY:
9130 9148 /*
9131 9149 * This is a replayed stateid; if
9132 9150 * seqid matches the next expected,
9133 9151 * then client is using wrong seqid.
9134 9152 */
9135 9153 case NFS4_CHKSEQ_BAD:
9136 9154 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9137 9155 goto end;
9138 9156 case NFS4_CHKSEQ_REPLAY:
9139 9157 rfs4_update_lease(lsp->rls_locker->rl_client);
9140 9158 *cs->statusp = status = resp->status;
9141 9159 goto end;
9142 9160 }
9143 9161 break;
9144 9162 default:
9145 9163 ASSERT(FALSE);
9146 9164 break;
9147 9165 }
9148 9166
9149 9167 rfs4_update_lock_sequence(lsp);
9150 9168 rfs4_update_lease(lsp->rls_locker->rl_client);
9151 9169 }
9152 9170
9153 9171 /*
9154 9172 * NFS4 only allows locking on regular files, so
9155 9173 * verify type of object.
9156 9174 */
9157 9175 if (cs->vp->v_type != VREG) {
9158 9176 if (cs->vp->v_type == VDIR)
9159 9177 status = NFS4ERR_ISDIR;
9160 9178 else
9161 9179 status = NFS4ERR_INVAL;
9162 9180 goto out;
9163 9181 }
9164 9182
9165 9183 cp = lsp->rls_state->rs_owner->ro_client;
9166 9184
9167 9185 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9168 9186 status = NFS4ERR_GRACE;
9169 9187 goto out;
9170 9188 }
9171 9189
9172 9190 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9173 9191 status = NFS4ERR_NO_GRACE;
9174 9192 goto out;
9175 9193 }
9176 9194
9177 9195 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9178 9196 status = NFS4ERR_NO_GRACE;
9179 9197 goto out;
9180 9198 }
9181 9199
9182 9200 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9183 9201 cs->deleg = TRUE;
9184 9202
9185 9203 status = rfs4_do_lock(lsp, args->locktype,
9186 9204 args->offset, args->length, cs->cr, resop);
9187 9205
9188 9206 out:
9189 9207 lsp->rls_skip_seqid_check = FALSE;
9190 9208
9191 9209 *cs->statusp = resp->status = status;
9192 9210
9193 9211 if (status == NFS4_OK) {
9194 9212 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9195 9213 lsp->rls_lock_completed = TRUE;
9196 9214 }
9197 9215 /*
9198 9216 * Only update the "OPEN" response here if this was a new
9199 9217 * lock_owner
9200 9218 */
9201 9219 if (sp)
9202 9220 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9203 9221
9204 9222 rfs4_update_lock_resp(lsp, resop);
9205 9223
9206 9224 end:
9207 9225 if (lsp) {
9208 9226 if (ls_sw_held)
9209 9227 rfs4_sw_exit(&lsp->rls_sw);
9210 9228 /*
9211 9229 * If an sp obtained, then the lsp does not represent
9212 9230 * a lock on the file struct.
9213 9231 */
9214 9232 if (sp != NULL)
9215 9233 rfs4_lo_state_rele(lsp, FALSE);
9216 9234 else
9217 9235 rfs4_lo_state_rele(lsp, TRUE);
9218 9236 }
9219 9237 if (sp) {
9220 9238 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9221 9239 rfs4_state_rele(sp);
9222 9240 }
9223 9241
9224 9242 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9225 9243 LOCK4res *, resp);
9226 9244 }
9227 9245
9228 9246 /* free function for LOCK/LOCKT */
9229 9247 static void
9230 9248 lock_denied_free(nfs_resop4 *resop)
9231 9249 {
9232 9250 LOCK4denied *dp = NULL;
9233 9251
9234 9252 switch (resop->resop) {
9235 9253 case OP_LOCK:
9236 9254 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9237 9255 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9238 9256 break;
9239 9257 case OP_LOCKT:
9240 9258 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9241 9259 dp = &resop->nfs_resop4_u.oplockt.denied;
9242 9260 break;
9243 9261 default:
9244 9262 break;
9245 9263 }
9246 9264
9247 9265 if (dp)
9248 9266 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9249 9267 }
9250 9268
9251 9269 /*ARGSUSED*/
9252 9270 void
9253 9271 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9254 9272 struct svc_req *req, struct compound_state *cs)
9255 9273 {
9256 9274 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9257 9275 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9258 9276 nfsstat4 status;
9259 9277 stateid4 *stateid = &args->lock_stateid;
9260 9278 rfs4_lo_state_t *lsp;
9261 9279
9262 9280 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9263 9281 LOCKU4args *, args);
9264 9282
9265 9283 if (cs->vp == NULL) {
9266 9284 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9267 9285 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9268 9286 LOCKU4res *, resp);
9269 9287 return;
9270 9288 }
9271 9289
9272 9290 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9273 9291 *cs->statusp = resp->status = status;
9274 9292 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9275 9293 LOCKU4res *, resp);
9276 9294 return;
9277 9295 }
9278 9296
9279 9297 /* Ensure specified filehandle matches */
9280 9298 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9281 9299 rfs4_lo_state_rele(lsp, TRUE);
9282 9300 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9283 9301 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9284 9302 LOCKU4res *, resp);
9285 9303 return;
9286 9304 }
9287 9305
9288 9306 /* hold off other access to lsp while we tinker */
9289 9307 rfs4_sw_enter(&lsp->rls_sw);
9290 9308
9291 9309 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9292 9310 case NFS4_CHECK_STATEID_OKAY:
9293 9311 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9294 9312 != NFS4_CHKSEQ_OKAY) {
9295 9313 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9296 9314 goto end;
9297 9315 }
9298 9316 break;
9299 9317 case NFS4_CHECK_STATEID_OLD:
9300 9318 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9301 9319 goto end;
9302 9320 case NFS4_CHECK_STATEID_BAD:
9303 9321 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9304 9322 goto end;
9305 9323 case NFS4_CHECK_STATEID_EXPIRED:
9306 9324 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9307 9325 goto end;
9308 9326 case NFS4_CHECK_STATEID_CLOSED:
9309 9327 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9310 9328 goto end;
9311 9329 case NFS4_CHECK_STATEID_REPLAY:
9312 9330 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9313 9331 case NFS4_CHKSEQ_OKAY:
9314 9332 /*
9315 9333 * This is a replayed stateid; if
9316 9334 * seqid matches the next expected,
9317 9335 * then client is using wrong seqid.
9318 9336 */
9319 9337 case NFS4_CHKSEQ_BAD:
9320 9338 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9321 9339 goto end;
9322 9340 case NFS4_CHKSEQ_REPLAY:
9323 9341 rfs4_update_lease(lsp->rls_locker->rl_client);
9324 9342 *cs->statusp = status = resp->status;
9325 9343 goto end;
9326 9344 }
9327 9345 break;
9328 9346 default:
9329 9347 ASSERT(FALSE);
9330 9348 break;
9331 9349 }
9332 9350
9333 9351 rfs4_update_lock_sequence(lsp);
9334 9352 rfs4_update_lease(lsp->rls_locker->rl_client);
9335 9353
9336 9354 /*
9337 9355 * NFS4 only allows locking on regular files, so
9338 9356 * verify type of object.
9339 9357 */
9340 9358 if (cs->vp->v_type != VREG) {
9341 9359 if (cs->vp->v_type == VDIR)
9342 9360 status = NFS4ERR_ISDIR;
9343 9361 else
9344 9362 status = NFS4ERR_INVAL;
9345 9363 goto out;
9346 9364 }
9347 9365
9348 9366 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9349 9367 status = NFS4ERR_GRACE;
9350 9368 goto out;
9351 9369 }
9352 9370
9353 9371 status = rfs4_do_lock(lsp, args->locktype,
9354 9372 args->offset, args->length, cs->cr, resop);
9355 9373
9356 9374 out:
9357 9375 *cs->statusp = resp->status = status;
9358 9376
9359 9377 if (status == NFS4_OK)
9360 9378 resp->lock_stateid = lsp->rls_lockid.stateid;
9361 9379
9362 9380 rfs4_update_lock_resp(lsp, resop);
9363 9381
9364 9382 end:
9365 9383 rfs4_sw_exit(&lsp->rls_sw);
9366 9384 rfs4_lo_state_rele(lsp, TRUE);
9367 9385
9368 9386 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9369 9387 LOCKU4res *, resp);
9370 9388 }
9371 9389
9372 9390 /*
9373 9391 * LOCKT is a best effort routine, the client can not be guaranteed that
9374 9392 * the status return is still in effect by the time the reply is received.
9375 9393 * They are numerous race conditions in this routine, but we are not required
9376 9394 * and can not be accurate.
9377 9395 */
9378 9396 /*ARGSUSED*/
9379 9397 void
9380 9398 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9381 9399 struct svc_req *req, struct compound_state *cs)
9382 9400 {
9383 9401 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9384 9402 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9385 9403 rfs4_lockowner_t *lo;
9386 9404 rfs4_client_t *cp;
9387 9405 bool_t create = FALSE;
9388 9406 struct flock64 flk;
9389 9407 int error;
9390 9408 int flag = FREAD | FWRITE;
9391 9409 int ltype;
9392 9410 length4 posix_length;
9393 9411 sysid_t sysid;
9394 9412 pid_t pid;
9395 9413
9396 9414 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9397 9415 LOCKT4args *, args);
9398 9416
9399 9417 if (cs->vp == NULL) {
9400 9418 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9401 9419 goto out;
9402 9420 }
9403 9421
9404 9422 /*
9405 9423 * NFS4 only allows locking on regular files, so
9406 9424 * verify type of object.
9407 9425 */
9408 9426 if (cs->vp->v_type != VREG) {
9409 9427 if (cs->vp->v_type == VDIR)
9410 9428 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9411 9429 else
9412 9430 *cs->statusp = resp->status = NFS4ERR_INVAL;
9413 9431 goto out;
9414 9432 }
9415 9433
9416 9434 /*
9417 9435 * Check out the clientid to ensure the server knows about it
9418 9436 * so that we correctly inform the client of a server reboot.
9419 9437 */
9420 9438 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9421 9439 == NULL) {
9422 9440 *cs->statusp = resp->status =
9423 9441 rfs4_check_clientid(&args->owner.clientid, 0);
9424 9442 goto out;
9425 9443 }
9426 9444 if (rfs4_lease_expired(cp)) {
9427 9445 rfs4_client_close(cp);
9428 9446 /*
9429 9447 * Protocol doesn't allow returning NFS4ERR_STALE as
9430 9448 * other operations do on this check so STALE_CLIENTID
9431 9449 * is returned instead
9432 9450 */
9433 9451 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9434 9452 goto out;
9435 9453 }
9436 9454
9437 9455 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9438 9456 *cs->statusp = resp->status = NFS4ERR_GRACE;
9439 9457 rfs4_client_rele(cp);
9440 9458 goto out;
9441 9459 }
9442 9460 rfs4_client_rele(cp);
9443 9461
9444 9462 resp->status = NFS4_OK;
9445 9463
9446 9464 switch (args->locktype) {
9447 9465 case READ_LT:
9448 9466 case READW_LT:
9449 9467 ltype = F_RDLCK;
9450 9468 break;
9451 9469 case WRITE_LT:
9452 9470 case WRITEW_LT:
9453 9471 ltype = F_WRLCK;
9454 9472 break;
9455 9473 }
9456 9474
9457 9475 posix_length = args->length;
9458 9476 /* Check for zero length. To lock to end of file use all ones for V4 */
9459 9477 if (posix_length == 0) {
9460 9478 *cs->statusp = resp->status = NFS4ERR_INVAL;
9461 9479 goto out;
9462 9480 } else if (posix_length == (length4)(~0)) {
9463 9481 posix_length = 0; /* Posix to end of file */
9464 9482 }
9465 9483
9466 9484 /* Find or create a lockowner */
9467 9485 lo = rfs4_findlockowner(&args->owner, &create);
9468 9486
9469 9487 if (lo) {
9470 9488 pid = lo->rl_pid;
9471 9489 if ((resp->status =
9472 9490 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9473 9491 goto err;
9474 9492 } else {
9475 9493 pid = 0;
9476 9494 sysid = lockt_sysid;
9477 9495 }
9478 9496 retry:
9479 9497 flk.l_type = ltype;
9480 9498 flk.l_whence = 0; /* SEEK_SET */
9481 9499 flk.l_start = args->offset;
9482 9500 flk.l_len = posix_length;
9483 9501 flk.l_sysid = sysid;
9484 9502 flk.l_pid = pid;
9485 9503 flag |= F_REMOTELOCK;
9486 9504
9487 9505 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9488 9506
9489 9507 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9490 9508 if (flk.l_len < 0 || flk.l_start < 0) {
9491 9509 resp->status = NFS4ERR_INVAL;
9492 9510 goto err;
9493 9511 }
9494 9512 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9495 9513 NULL, cs->cr, NULL);
9496 9514
9497 9515 /*
9498 9516 * N.B. We map error values to nfsv4 errors. This is differrent
9499 9517 * than puterrno4 routine.
9500 9518 */
9501 9519 switch (error) {
9502 9520 case 0:
9503 9521 if (flk.l_type == F_UNLCK)
9504 9522 resp->status = NFS4_OK;
9505 9523 else {
9506 9524 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9507 9525 goto retry;
9508 9526 resp->status = NFS4ERR_DENIED;
9509 9527 }
9510 9528 break;
9511 9529 case EOVERFLOW:
9512 9530 resp->status = NFS4ERR_INVAL;
9513 9531 break;
9514 9532 case EINVAL:
9515 9533 resp->status = NFS4ERR_NOTSUPP;
9516 9534 break;
9517 9535 default:
9518 9536 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9519 9537 error);
9520 9538 resp->status = NFS4ERR_SERVERFAULT;
9521 9539 break;
9522 9540 }
9523 9541
9524 9542 err:
9525 9543 if (lo)
9526 9544 rfs4_lockowner_rele(lo);
9527 9545 *cs->statusp = resp->status;
9528 9546 out:
9529 9547 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9530 9548 LOCKT4res *, resp);
9531 9549 }
9532 9550
9533 9551 int
9534 9552 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9535 9553 {
9536 9554 int err;
9537 9555 int cmd;
9538 9556 vnode_t *vp;
9539 9557 struct shrlock shr;
9540 9558 struct shr_locowner shr_loco;
9541 9559 int fflags = 0;
9542 9560
9543 9561 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9544 9562 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9545 9563
9546 9564 if (sp->rs_closed)
9547 9565 return (NFS4ERR_OLD_STATEID);
9548 9566
9549 9567 vp = sp->rs_finfo->rf_vp;
9550 9568 ASSERT(vp);
9551 9569
9552 9570 shr.s_access = shr.s_deny = 0;
9553 9571
9554 9572 if (access & OPEN4_SHARE_ACCESS_READ) {
9555 9573 fflags |= FREAD;
9556 9574 shr.s_access |= F_RDACC;
9557 9575 }
9558 9576 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9559 9577 fflags |= FWRITE;
9560 9578 shr.s_access |= F_WRACC;
9561 9579 }
9562 9580 ASSERT(shr.s_access);
9563 9581
9564 9582 if (deny & OPEN4_SHARE_DENY_READ)
9565 9583 shr.s_deny |= F_RDDNY;
9566 9584 if (deny & OPEN4_SHARE_DENY_WRITE)
9567 9585 shr.s_deny |= F_WRDNY;
9568 9586
9569 9587 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9570 9588 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9571 9589 shr_loco.sl_pid = shr.s_pid;
9572 9590 shr_loco.sl_id = shr.s_sysid;
9573 9591 shr.s_owner = (caddr_t)&shr_loco;
9574 9592 shr.s_own_len = sizeof (shr_loco);
9575 9593
9576 9594 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9577 9595
9578 9596 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9579 9597 if (err != 0) {
9580 9598 if (err == EAGAIN)
9581 9599 err = NFS4ERR_SHARE_DENIED;
9582 9600 else
9583 9601 err = puterrno4(err);
9584 9602 return (err);
9585 9603 }
9586 9604
9587 9605 sp->rs_share_access |= access;
9588 9606 sp->rs_share_deny |= deny;
9589 9607
9590 9608 return (0);
9591 9609 }
9592 9610
9593 9611 int
9594 9612 rfs4_unshare(rfs4_state_t *sp)
9595 9613 {
9596 9614 int err;
9597 9615 struct shrlock shr;
9598 9616 struct shr_locowner shr_loco;
9599 9617
9600 9618 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9601 9619
9602 9620 if (sp->rs_closed || sp->rs_share_access == 0)
9603 9621 return (0);
9604 9622
9605 9623 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9606 9624 ASSERT(sp->rs_finfo->rf_vp);
9607 9625
9608 9626 shr.s_access = shr.s_deny = 0;
9609 9627 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9610 9628 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9611 9629 shr_loco.sl_pid = shr.s_pid;
9612 9630 shr_loco.sl_id = shr.s_sysid;
9613 9631 shr.s_owner = (caddr_t)&shr_loco;
9614 9632 shr.s_own_len = sizeof (shr_loco);
9615 9633
9616 9634 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9617 9635 NULL);
9618 9636 if (err != 0) {
9619 9637 err = puterrno4(err);
9620 9638 return (err);
9621 9639 }
9622 9640
9623 9641 sp->rs_share_access = 0;
9624 9642 sp->rs_share_deny = 0;
9625 9643
9626 9644 return (0);
9627 9645
9628 9646 }
9629 9647
9630 9648 static int
9631 9649 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9632 9650 {
9633 9651 struct clist *wcl;
9634 9652 count4 count = rok->data_len;
9635 9653 int wlist_len;
9636 9654
9637 9655 wcl = args->wlist;
9638 9656 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9639 9657 return (FALSE);
9640 9658 }
9641 9659 wcl = args->wlist;
9642 9660 rok->wlist_len = wlist_len;
9643 9661 rok->wlist = wcl;
9644 9662 return (TRUE);
9645 9663 }
9646 9664
9647 9665 /* tunable to disable server referrals */
9648 9666 int rfs4_no_referrals = 0;
9649 9667
9650 9668 /*
9651 9669 * Find an NFS record in reparse point data.
9652 9670 * Returns 0 for success and <0 or an errno value on failure.
9653 9671 */
9654 9672 int
9655 9673 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9656 9674 {
9657 9675 int err;
9658 9676 char *stype, *val;
9659 9677 nvlist_t *nvl;
9660 9678 nvpair_t *curr;
9661 9679
9662 9680 if ((nvl = reparse_init()) == NULL)
9663 9681 return (-1);
9664 9682
9665 9683 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9666 9684 reparse_free(nvl);
9667 9685 return (err);
9668 9686 }
9669 9687
9670 9688 curr = NULL;
9671 9689 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9672 9690 if ((stype = nvpair_name(curr)) == NULL) {
9673 9691 reparse_free(nvl);
9674 9692 return (-2);
9675 9693 }
9676 9694 if (strncasecmp(stype, "NFS", 3) == 0)
9677 9695 break;
9678 9696 }
9679 9697
9680 9698 if ((curr == NULL) ||
9681 9699 (nvpair_value_string(curr, &val))) {
9682 9700 reparse_free(nvl);
9683 9701 return (-3);
9684 9702 }
9685 9703 *nvlp = nvl;
9686 9704 *svcp = stype;
9687 9705 *datap = val;
9688 9706 return (0);
9689 9707 }
9690 9708
9691 9709 int
9692 9710 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9693 9711 {
9694 9712 nvlist_t *nvl;
9695 9713 char *s, *d;
9696 9714
9697 9715 if (rfs4_no_referrals != 0)
9698 9716 return (B_FALSE);
9699 9717
9700 9718 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9701 9719 return (B_FALSE);
9702 9720
9703 9721 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9704 9722 return (B_FALSE);
9705 9723
9706 9724 reparse_free(nvl);
9707 9725
9708 9726 return (B_TRUE);
9709 9727 }
9710 9728
9711 9729 /*
9712 9730 * There is a user-level copy of this routine in ref_subr.c.
9713 9731 * Changes should be kept in sync.
9714 9732 */
9715 9733 static int
9716 9734 nfs4_create_components(char *path, component4 *comp4)
9717 9735 {
9718 9736 int slen, plen, ncomp;
9719 9737 char *ori_path, *nxtc, buf[MAXNAMELEN];
9720 9738
9721 9739 if (path == NULL)
9722 9740 return (0);
9723 9741
9724 9742 plen = strlen(path) + 1; /* include the terminator */
9725 9743 ori_path = path;
9726 9744 ncomp = 0;
9727 9745
9728 9746 /* count number of components in the path */
9729 9747 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9730 9748 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9731 9749 if ((slen = nxtc - path) == 0) {
9732 9750 path = nxtc + 1;
9733 9751 continue;
9734 9752 }
9735 9753
9736 9754 if (comp4 != NULL) {
9737 9755 bcopy(path, buf, slen);
9738 9756 buf[slen] = '\0';
9739 9757 (void) str_to_utf8(buf, &comp4[ncomp]);
9740 9758 }
9741 9759
9742 9760 ncomp++; /* 1 valid component */
9743 9761 path = nxtc + 1;
9744 9762 }
9745 9763 if (*nxtc == '\0' || *nxtc == '\n')
9746 9764 break;
9747 9765 }
9748 9766
9749 9767 return (ncomp);
9750 9768 }
9751 9769
9752 9770 /*
9753 9771 * There is a user-level copy of this routine in ref_subr.c.
9754 9772 * Changes should be kept in sync.
9755 9773 */
9756 9774 static int
9757 9775 make_pathname4(char *path, pathname4 *pathname)
9758 9776 {
9759 9777 int ncomp;
9760 9778 component4 *comp4;
9761 9779
9762 9780 if (pathname == NULL)
9763 9781 return (0);
9764 9782
9765 9783 if (path == NULL) {
9766 9784 pathname->pathname4_val = NULL;
9767 9785 pathname->pathname4_len = 0;
9768 9786 return (0);
9769 9787 }
9770 9788
9771 9789 /* count number of components to alloc buffer */
9772 9790 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9773 9791 pathname->pathname4_val = NULL;
9774 9792 pathname->pathname4_len = 0;
9775 9793 return (0);
9776 9794 }
9777 9795 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9778 9796
9779 9797 /* copy components into allocated buffer */
9780 9798 ncomp = nfs4_create_components(path, comp4);
9781 9799
9782 9800 pathname->pathname4_val = comp4;
9783 9801 pathname->pathname4_len = ncomp;
9784 9802
9785 9803 return (ncomp);
9786 9804 }
9787 9805
9788 9806 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9789 9807
9790 9808 fs_locations4 *
9791 9809 fetch_referral(vnode_t *vp, cred_t *cr)
9792 9810 {
9793 9811 nvlist_t *nvl;
9794 9812 char *stype, *sdata;
9795 9813 fs_locations4 *result;
9796 9814 char buf[1024];
9797 9815 size_t bufsize;
9798 9816 XDR xdr;
9799 9817 int err;
9800 9818
9801 9819 /*
9802 9820 * Check attrs to ensure it's a reparse point
9803 9821 */
9804 9822 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9805 9823 return (NULL);
9806 9824
9807 9825 /*
9808 9826 * Look for an NFS record and get the type and data
9809 9827 */
9810 9828 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9811 9829 return (NULL);
9812 9830
9813 9831 /*
9814 9832 * With the type and data, upcall to get the referral
9815 9833 */
9816 9834 bufsize = sizeof (buf);
9817 9835 bzero(buf, sizeof (buf));
9818 9836 err = reparse_kderef((const char *)stype, (const char *)sdata,
9819 9837 buf, &bufsize);
9820 9838 reparse_free(nvl);
9821 9839
9822 9840 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9823 9841 char *, stype, char *, sdata, char *, buf, int, err);
9824 9842 if (err) {
9825 9843 cmn_err(CE_NOTE,
9826 9844 "reparsed daemon not running: unable to get referral (%d)",
9827 9845 err);
9828 9846 return (NULL);
9829 9847 }
9830 9848
9831 9849 /*
9832 9850 * We get an XDR'ed record back from the kderef call
9833 9851 */
9834 9852 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9835 9853 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9836 9854 err = xdr_fs_locations4(&xdr, result);
9837 9855 XDR_DESTROY(&xdr);
9838 9856 if (err != TRUE) {
9839 9857 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9840 9858 int, err);
9841 9859 return (NULL);
9842 9860 }
9843 9861
9844 9862 /*
9845 9863 * Look at path to recover fs_root, ignoring the leading '/'
9846 9864 */
9847 9865 (void) make_pathname4(vp->v_path, &result->fs_root);
9848 9866
9849 9867 return (result);
9850 9868 }
9851 9869
9852 9870 char *
9853 9871 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9854 9872 {
9855 9873 fs_locations4 *fsl;
9856 9874 fs_location4 *fs;
9857 9875 char *server, *path, *symbuf;
9858 9876 static char *prefix = "/net/";
9859 9877 int i, size, npaths;
9860 9878 uint_t len;
9861 9879
9862 9880 /* Get the referral */
9863 9881 if ((fsl = fetch_referral(vp, cr)) == NULL)
9864 9882 return (NULL);
9865 9883
9866 9884 /* Deal with only the first location and first server */
9867 9885 fs = &fsl->locations_val[0];
9868 9886 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9869 9887 if (server == NULL) {
9870 9888 rfs4_free_fs_locations4(fsl);
9871 9889 kmem_free(fsl, sizeof (fs_locations4));
9872 9890 return (NULL);
9873 9891 }
9874 9892
9875 9893 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9876 9894 size = strlen(prefix) + len;
9877 9895 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9878 9896 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9879 9897
9880 9898 /* Allocate the symlink buffer and fill it */
9881 9899 symbuf = kmem_zalloc(size, KM_SLEEP);
9882 9900 (void) strcat(symbuf, prefix);
9883 9901 (void) strcat(symbuf, server);
9884 9902 kmem_free(server, len);
9885 9903
9886 9904 npaths = 0;
9887 9905 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9888 9906 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9889 9907 if (path == NULL)
9890 9908 continue;
9891 9909 (void) strcat(symbuf, "/");
9892 9910 (void) strcat(symbuf, path);
9893 9911 npaths++;
9894 9912 kmem_free(path, len);
9895 9913 }
9896 9914
9897 9915 rfs4_free_fs_locations4(fsl);
9898 9916 kmem_free(fsl, sizeof (fs_locations4));
9899 9917
9900 9918 if (strsz != NULL)
9901 9919 *strsz = size;
9902 9920 return (symbuf);
9903 9921 }
9904 9922
9905 9923 /*
9906 9924 * Check to see if we have a downrev Solaris client, so that we
9907 9925 * can send it a symlink instead of a referral.
9908 9926 */
9909 9927 int
9910 9928 client_is_downrev(struct svc_req *req)
9911 9929 {
9912 9930 struct sockaddr *ca;
9913 9931 rfs4_clntip_t *ci;
9914 9932 bool_t create = FALSE;
9915 9933 int is_downrev;
9916 9934
9917 9935 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9918 9936 ASSERT(ca);
9919 9937 ci = rfs4_find_clntip(ca, &create);
9920 9938 if (ci == NULL)
9921 9939 return (0);
9922 9940 is_downrev = ci->ri_no_referrals;
9923 9941 rfs4_dbe_rele(ci->ri_dbe);
9924 9942 return (is_downrev);
9925 9943 }
9926 9944
9927 9945 /*
9928 9946 * Do the main work of handling HA-NFSv4 Resource Group failover on
9929 9947 * Sun Cluster.
9930 9948 * We need to detect whether any RG admin paths have been added or removed,
9931 9949 * and adjust resources accordingly.
9932 9950 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9933 9951 * order to scale, the list and array of paths need to be held in more
9934 9952 * suitable data structures.
9935 9953 */
9936 9954 static void
9937 9955 hanfsv4_failover(nfs4_srv_t *nsrv4)
9938 9956 {
9939 9957 int i, start_grace, numadded_paths = 0;
9940 9958 char **added_paths = NULL;
9941 9959 rfs4_dss_path_t *dss_path;
9942 9960
9943 9961 /*
9944 9962 * Note: currently, dss_pathlist cannot be NULL, since
9945 9963 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9946 9964 * make the latter dynamically specified too, the following will
9947 9965 * need to be adjusted.
9948 9966 */
9949 9967
9950 9968 /*
9951 9969 * First, look for removed paths: RGs that have been failed-over
9952 9970 * away from this node.
9953 9971 * Walk the "currently-serving" dss_pathlist and, for each
9954 9972 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9955 9973 * from nfsd. If not, that RG path has been removed.
9956 9974 *
9957 9975 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9958 9976 * any duplicates.
9959 9977 */
9960 9978 dss_path = nsrv4->dss_pathlist;
9961 9979 do {
9962 9980 int found = 0;
9963 9981 char *path = dss_path->path;
9964 9982
9965 9983 /* used only for non-HA so may not be removed */
9966 9984 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9967 9985 dss_path = dss_path->next;
9968 9986 continue;
9969 9987 }
9970 9988
9971 9989 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9972 9990 int cmpret;
9973 9991 char *newpath = rfs4_dss_newpaths[i];
9974 9992
9975 9993 /*
9976 9994 * Since nfsd has sorted rfs4_dss_newpaths for us,
9977 9995 * once the return from strcmp is negative we know
9978 9996 * we've passed the point where "path" should be,
9979 9997 * and can stop searching: "path" has been removed.
9980 9998 */
9981 9999 cmpret = strcmp(path, newpath);
9982 10000 if (cmpret < 0)
9983 10001 break;
9984 10002 if (cmpret == 0) {
9985 10003 found = 1;
9986 10004 break;
9987 10005 }
9988 10006 }
9989 10007
9990 10008 if (found == 0) {
9991 10009 unsigned index = dss_path->index;
9992 10010 rfs4_servinst_t *sip = dss_path->sip;
9993 10011 rfs4_dss_path_t *path_next = dss_path->next;
9994 10012
9995 10013 /*
9996 10014 * This path has been removed.
9997 10015 * We must clear out the servinst reference to
9998 10016 * it, since it's now owned by another
9999 10017 * node: we should not attempt to touch it.
10000 10018 */
10001 10019 ASSERT(dss_path == sip->dss_paths[index]);
10002 10020 sip->dss_paths[index] = NULL;
10003 10021
10004 10022 /* remove from "currently-serving" list, and destroy */
10005 10023 remque(dss_path);
10006 10024 /* allow for NUL */
10007 10025 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10008 10026 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10009 10027
10010 10028 dss_path = path_next;
10011 10029 } else {
10012 10030 /* path was found; not removed */
10013 10031 dss_path = dss_path->next;
10014 10032 }
10015 10033 } while (dss_path != nsrv4->dss_pathlist);
10016 10034
10017 10035 /*
10018 10036 * Now, look for added paths: RGs that have been failed-over
10019 10037 * to this node.
10020 10038 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10021 10039 * for each path, check if it is on the "currently-serving"
10022 10040 * dss_pathlist. If not, that RG path has been added.
10023 10041 *
10024 10042 * Note: we don't do duplicate detection here; nfsd does that for us.
10025 10043 *
10026 10044 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10027 10045 * an upper bound for the size needed for added_paths[numadded_paths].
10028 10046 */
10029 10047
10030 10048 /* probably more space than we need, but guaranteed to be enough */
10031 10049 if (rfs4_dss_numnewpaths > 0) {
10032 10050 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10033 10051 added_paths = kmem_zalloc(sz, KM_SLEEP);
10034 10052 }
10035 10053
10036 10054 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10037 10055 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10038 10056 int found = 0;
10039 10057 char *newpath = rfs4_dss_newpaths[i];
10040 10058
10041 10059 dss_path = nsrv4->dss_pathlist;
10042 10060 do {
10043 10061 char *path = dss_path->path;
10044 10062
10045 10063 /* used only for non-HA */
10046 10064 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10047 10065 dss_path = dss_path->next;
10048 10066 continue;
10049 10067 }
10050 10068
10051 10069 if (strncmp(path, newpath, strlen(path)) == 0) {
10052 10070 found = 1;
10053 10071 break;
10054 10072 }
10055 10073
10056 10074 dss_path = dss_path->next;
10057 10075 } while (dss_path != nsrv4->dss_pathlist);
10058 10076
10059 10077 if (found == 0) {
10060 10078 added_paths[numadded_paths] = newpath;
10061 10079 numadded_paths++;
10062 10080 }
10063 10081 }
10064 10082
10065 10083 /* did we find any added paths? */
10066 10084 if (numadded_paths > 0) {
10067 10085
10068 10086 /* create a new server instance, and start its grace period */
10069 10087 start_grace = 1;
10070 10088 /* CSTYLED */
10071 10089 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10072 10090
10073 10091 /* read in the stable storage state from these paths */
10074 10092 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10075 10093
10076 10094 /*
10077 10095 * Multiple failovers during a grace period will cause
10078 10096 * clients of the same resource group to be partitioned
10079 10097 * into different server instances, with different
10080 10098 * grace periods. Since clients of the same resource
10081 10099 * group must be subject to the same grace period,
10082 10100 * we need to reset all currently active grace periods.
10083 10101 */
10084 10102 rfs4_grace_reset_all(nsrv4);
10085 10103 }
10086 10104
10087 10105 if (rfs4_dss_numnewpaths > 0)
10088 10106 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10089 10107 }
|
↓ open down ↓ |
7368 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX