Print this page
Fix NFS design problems re. multiple zone keys
Make NFS server zone-specific data all have the same lifetime
Fix rfs4_clean_state_exi
Fix exi_cache_reclaim
Fix mistakes in zone keys work
More fixes re. exi_zoneid and exi_tree
(danmcd -> Keep some ASSERT()s around for readability.)
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 28 * All Rights Reserved
29 29 */
30 30
31 31 /*
32 32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33 33 * Copyright 2019 Nexenta Systems, Inc.
34 34 * Copyright 2019 Nexenta by DDN, Inc.
35 35 */
36 36
37 37 #include <sys/param.h>
38 38 #include <sys/types.h>
39 39 #include <sys/systm.h>
40 40 #include <sys/cred.h>
41 41 #include <sys/buf.h>
42 42 #include <sys/vfs.h>
43 43 #include <sys/vfs_opreg.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/uio.h>
46 46 #include <sys/errno.h>
47 47 #include <sys/sysmacros.h>
48 48 #include <sys/statvfs.h>
49 49 #include <sys/kmem.h>
50 50 #include <sys/dirent.h>
51 51 #include <sys/cmn_err.h>
52 52 #include <sys/debug.h>
53 53 #include <sys/systeminfo.h>
54 54 #include <sys/flock.h>
55 55 #include <sys/pathname.h>
56 56 #include <sys/nbmlock.h>
57 57 #include <sys/share.h>
58 58 #include <sys/atomic.h>
59 59 #include <sys/policy.h>
60 60 #include <sys/fem.h>
61 61 #include <sys/sdt.h>
62 62 #include <sys/ddi.h>
63 63 #include <sys/zone.h>
64 64
65 65 #include <fs/fs_reparse.h>
66 66
67 67 #include <rpc/types.h>
68 68 #include <rpc/auth.h>
69 69 #include <rpc/rpcsec_gss.h>
70 70 #include <rpc/svc.h>
71 71
72 72 #include <nfs/nfs.h>
73 73 #include <nfs/nfssys.h>
74 74 #include <nfs/export.h>
75 75 #include <nfs/nfs_cmd.h>
76 76 #include <nfs/lm.h>
77 77 #include <nfs/nfs4.h>
78 78 #include <nfs/nfs4_drc.h>
79 79
80 80 #include <sys/strsubr.h>
81 81 #include <sys/strsun.h>
82 82
83 83 #include <inet/common.h>
84 84 #include <inet/ip.h>
85 85 #include <inet/ip6.h>
86 86
87 87 #include <sys/tsol/label.h>
88 88 #include <sys/tsol/tndb.h>
89 89
90 90 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
91 91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
92 92 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
93 93 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
94 94 extern struct svc_ops rdma_svc_ops;
95 95 extern int nfs_loaned_buffers;
96 96 /* End of Tunables */
97 97
98 98 static int rdma_setup_read_data4(READ4args *, READ4res *);
99 99
100 100 /*
101 101 * Used to bump the stateid4.seqid value and show changes in the stateid
102 102 */
103 103 #define next_stateid(sp) (++(sp)->bits.chgseq)
104 104
105 105 /*
106 106 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
107 107 * This is used to return NFS4ERR_TOOSMALL when clients specify
108 108 * maxcount that isn't large enough to hold the smallest possible
109 109 * XDR encoded dirent.
110 110 *
111 111 * sizeof cookie (8 bytes) +
112 112 * sizeof name_len (4 bytes) +
113 113 * sizeof smallest (padded) name (4 bytes) +
114 114 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
115 115 * sizeof attrlist4_len (4 bytes) +
116 116 * sizeof next boolean (4 bytes)
117 117 *
118 118 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
119 119 * the smallest possible entry4 (assumes no attrs requested).
120 120 * sizeof nfsstat4 (4 bytes) +
121 121 * sizeof verifier4 (8 bytes) +
122 122 * sizeof entry4list bool (4 bytes) +
123 123 * sizeof entry4 (36 bytes) +
124 124 * sizeof eof bool (4 bytes)
125 125 *
126 126 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
127 127 * VOP_READDIR. Its value is the size of the maximum possible dirent
128 128 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
129 129 * required for a given name length. MAXNAMELEN is the maximum
130 130 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
131 131 * macros are to allow for . and .. entries -- just a minor tweak to try
132 132 * and guarantee that buffer we give to VOP_READDIR will be large enough
133 133 * to hold ., .., and the largest possible solaris dirent64.
134 134 */
135 135 #define RFS4_MINLEN_ENTRY4 36
136 136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
137 137 #define RFS4_MINLEN_RDDIR_BUF \
138 138 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
139 139
140 140 /*
141 141 * It would be better to pad to 4 bytes since that's what XDR would do,
142 142 * but the dirents UFS gives us are already padded to 8, so just take
143 143 * what we're given. Dircount is only a hint anyway. Currently the
144 144 * solaris kernel is ASCII only, so there's no point in calling the
145 145 * UTF8 functions.
|
↓ open down ↓ |
145 lines elided |
↑ open up ↑ |
146 146 *
147 147 * dirent64: named padded to provide 8 byte struct alignment
148 148 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
149 149 *
150 150 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
151 151 *
152 152 */
153 153 #define DIRENT64_TO_DIRCOUNT(dp) \
154 154 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
155 155
156 -zone_key_t rfs4_zone_key;
157 156
158 157 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
159 158
160 159 u_longlong_t nfs4_srv_caller_id;
161 160 uint_t nfs4_srv_vkey = 0;
162 161
163 162 void rfs4_init_compound_state(struct compound_state *);
164 163
165 164 static void nullfree(caddr_t);
166 165 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 166 struct compound_state *);
168 167 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 168 struct compound_state *);
170 169 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 170 struct compound_state *);
172 171 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 172 struct compound_state *);
174 173 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
175 174 struct compound_state *);
176 175 static void rfs4_op_create_free(nfs_resop4 *resop);
177 176 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
178 177 struct svc_req *, struct compound_state *);
179 178 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
180 179 struct svc_req *, struct compound_state *);
181 180 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
182 181 struct compound_state *);
183 182 static void rfs4_op_getattr_free(nfs_resop4 *);
184 183 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
185 184 struct compound_state *);
186 185 static void rfs4_op_getfh_free(nfs_resop4 *);
187 186 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
188 187 struct compound_state *);
189 188 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
190 189 struct compound_state *);
191 190 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
192 191 struct compound_state *);
193 192 static void lock_denied_free(nfs_resop4 *);
194 193 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 194 struct compound_state *);
196 195 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
197 196 struct compound_state *);
198 197 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 198 struct compound_state *);
200 199 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
201 200 struct compound_state *);
202 201 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
203 202 struct svc_req *req, struct compound_state *cs);
204 203 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
205 204 struct compound_state *);
206 205 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
207 206 struct compound_state *);
208 207 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
209 208 struct svc_req *, struct compound_state *);
210 209 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
211 210 struct svc_req *, struct compound_state *);
212 211 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
213 212 struct compound_state *);
214 213 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
215 214 struct compound_state *);
216 215 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
217 216 struct compound_state *);
218 217 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
219 218 struct compound_state *);
220 219 static void rfs4_op_read_free(nfs_resop4 *);
221 220 static void rfs4_op_readdir_free(nfs_resop4 *resop);
222 221 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
223 222 struct compound_state *);
224 223 static void rfs4_op_readlink_free(nfs_resop4 *);
225 224 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
226 225 struct svc_req *, struct compound_state *);
227 226 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
228 227 struct compound_state *);
229 228 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 229 struct compound_state *);
231 230 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
232 231 struct compound_state *);
233 232 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
234 233 struct compound_state *);
235 234 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
236 235 struct compound_state *);
237 236 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
238 237 struct compound_state *);
239 238 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
240 239 struct compound_state *);
241 240 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
242 241 struct compound_state *);
243 242 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
244 243 struct svc_req *, struct compound_state *);
245 244 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
246 245 struct svc_req *req, struct compound_state *);
247 246 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
248 247 struct compound_state *);
249 248 static void rfs4_op_secinfo_free(nfs_resop4 *);
250 249
251 250 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
252 251 struct svc_req *);
253 252 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
254 253 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
255 254
256 255
257 256 /*
258 257 * translation table for attrs
259 258 */
260 259 struct nfs4_ntov_table {
261 260 union nfs4_attr_u *na;
262 261 uint8_t amap[NFS4_MAXNUM_ATTRS];
263 262 int attrcnt;
264 263 bool_t vfsstat;
265 264 };
266 265
267 266 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
268 267 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
269 268 struct nfs4_svgetit_arg *sargp);
270 269
271 270 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
272 271 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
273 272 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
274 273
275 274 static void hanfsv4_failover(nfs4_srv_t *);
276 275
277 276 fem_t *deleg_rdops;
278 277 fem_t *deleg_wrops;
279 278
280 279 /*
281 280 * NFS4 op dispatch table
282 281 */
283 282
284 283 struct rfsv4disp {
285 284 void (*dis_proc)(); /* proc to call */
286 285 void (*dis_resfree)(); /* frees space allocated by proc */
287 286 int dis_flags; /* RPC_IDEMPOTENT, etc... */
288 287 };
289 288
290 289 static struct rfsv4disp rfsv4disptab[] = {
291 290 /*
292 291 * NFS VERSION 4
293 292 */
294 293
295 294 /* RFS_NULL = 0 */
296 295 {rfs4_op_illegal, nullfree, 0},
297 296
298 297 /* UNUSED = 1 */
299 298 {rfs4_op_illegal, nullfree, 0},
300 299
301 300 /* UNUSED = 2 */
302 301 {rfs4_op_illegal, nullfree, 0},
303 302
304 303 /* OP_ACCESS = 3 */
305 304 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
306 305
307 306 /* OP_CLOSE = 4 */
308 307 {rfs4_op_close, nullfree, 0},
309 308
310 309 /* OP_COMMIT = 5 */
311 310 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
312 311
313 312 /* OP_CREATE = 6 */
314 313 {rfs4_op_create, nullfree, 0},
315 314
316 315 /* OP_DELEGPURGE = 7 */
317 316 {rfs4_op_delegpurge, nullfree, 0},
318 317
319 318 /* OP_DELEGRETURN = 8 */
320 319 {rfs4_op_delegreturn, nullfree, 0},
321 320
322 321 /* OP_GETATTR = 9 */
323 322 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
324 323
325 324 /* OP_GETFH = 10 */
326 325 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
327 326
328 327 /* OP_LINK = 11 */
329 328 {rfs4_op_link, nullfree, 0},
330 329
331 330 /* OP_LOCK = 12 */
332 331 {rfs4_op_lock, lock_denied_free, 0},
333 332
334 333 /* OP_LOCKT = 13 */
335 334 {rfs4_op_lockt, lock_denied_free, 0},
336 335
337 336 /* OP_LOCKU = 14 */
338 337 {rfs4_op_locku, nullfree, 0},
339 338
340 339 /* OP_LOOKUP = 15 */
341 340 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
342 341
343 342 /* OP_LOOKUPP = 16 */
344 343 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
345 344
346 345 /* OP_NVERIFY = 17 */
347 346 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
348 347
349 348 /* OP_OPEN = 18 */
350 349 {rfs4_op_open, rfs4_free_reply, 0},
351 350
352 351 /* OP_OPENATTR = 19 */
353 352 {rfs4_op_openattr, nullfree, 0},
354 353
355 354 /* OP_OPEN_CONFIRM = 20 */
356 355 {rfs4_op_open_confirm, nullfree, 0},
357 356
358 357 /* OP_OPEN_DOWNGRADE = 21 */
359 358 {rfs4_op_open_downgrade, nullfree, 0},
360 359
361 360 /* OP_OPEN_PUTFH = 22 */
362 361 {rfs4_op_putfh, nullfree, RPC_ALL},
363 362
364 363 /* OP_PUTPUBFH = 23 */
365 364 {rfs4_op_putpubfh, nullfree, RPC_ALL},
366 365
367 366 /* OP_PUTROOTFH = 24 */
368 367 {rfs4_op_putrootfh, nullfree, RPC_ALL},
369 368
370 369 /* OP_READ = 25 */
371 370 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
372 371
373 372 /* OP_READDIR = 26 */
374 373 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
375 374
376 375 /* OP_READLINK = 27 */
377 376 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
378 377
379 378 /* OP_REMOVE = 28 */
380 379 {rfs4_op_remove, nullfree, 0},
381 380
382 381 /* OP_RENAME = 29 */
383 382 {rfs4_op_rename, nullfree, 0},
384 383
385 384 /* OP_RENEW = 30 */
386 385 {rfs4_op_renew, nullfree, 0},
387 386
388 387 /* OP_RESTOREFH = 31 */
389 388 {rfs4_op_restorefh, nullfree, RPC_ALL},
390 389
391 390 /* OP_SAVEFH = 32 */
392 391 {rfs4_op_savefh, nullfree, RPC_ALL},
393 392
394 393 /* OP_SECINFO = 33 */
395 394 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
396 395
397 396 /* OP_SETATTR = 34 */
398 397 {rfs4_op_setattr, nullfree, 0},
399 398
400 399 /* OP_SETCLIENTID = 35 */
401 400 {rfs4_op_setclientid, nullfree, 0},
402 401
403 402 /* OP_SETCLIENTID_CONFIRM = 36 */
404 403 {rfs4_op_setclientid_confirm, nullfree, 0},
405 404
406 405 /* OP_VERIFY = 37 */
407 406 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
408 407
409 408 /* OP_WRITE = 38 */
410 409 {rfs4_op_write, nullfree, 0},
411 410
412 411 /* OP_RELEASE_LOCKOWNER = 39 */
413 412 {rfs4_op_release_lockowner, nullfree, 0},
414 413 };
415 414
416 415 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
417 416
418 417 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
419 418
420 419 #ifdef DEBUG
421 420
422 421 int rfs4_fillone_debug = 0;
423 422 int rfs4_no_stub_access = 1;
424 423 int rfs4_rddir_debug = 0;
425 424
426 425 static char *rfs4_op_string[] = {
427 426 "rfs4_op_null",
428 427 "rfs4_op_1 unused",
429 428 "rfs4_op_2 unused",
430 429 "rfs4_op_access",
431 430 "rfs4_op_close",
432 431 "rfs4_op_commit",
433 432 "rfs4_op_create",
434 433 "rfs4_op_delegpurge",
435 434 "rfs4_op_delegreturn",
436 435 "rfs4_op_getattr",
437 436 "rfs4_op_getfh",
438 437 "rfs4_op_link",
439 438 "rfs4_op_lock",
440 439 "rfs4_op_lockt",
441 440 "rfs4_op_locku",
442 441 "rfs4_op_lookup",
443 442 "rfs4_op_lookupp",
444 443 "rfs4_op_nverify",
445 444 "rfs4_op_open",
446 445 "rfs4_op_openattr",
447 446 "rfs4_op_open_confirm",
448 447 "rfs4_op_open_downgrade",
449 448 "rfs4_op_putfh",
450 449 "rfs4_op_putpubfh",
451 450 "rfs4_op_putrootfh",
452 451 "rfs4_op_read",
453 452 "rfs4_op_readdir",
454 453 "rfs4_op_readlink",
455 454 "rfs4_op_remove",
456 455 "rfs4_op_rename",
457 456 "rfs4_op_renew",
458 457 "rfs4_op_restorefh",
459 458 "rfs4_op_savefh",
460 459 "rfs4_op_secinfo",
461 460 "rfs4_op_setattr",
462 461 "rfs4_op_setclientid",
463 462 "rfs4_op_setclient_confirm",
464 463 "rfs4_op_verify",
465 464 "rfs4_op_write",
466 465 "rfs4_op_release_lockowner",
467 466 "rfs4_op_illegal"
468 467 };
469 468 #endif
470 469
471 470 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
472 471
473 472 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
474 473
475 474 extern void rfs4_free_fs_locations4(fs_locations4 *);
476 475
477 476 #ifdef nextdp
478 477 #undef nextdp
479 478 #endif
480 479 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
481 480
482 481 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
483 482 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
484 483 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
485 484 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
486 485 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
487 486 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
488 487 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
489 488 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
490 489 NULL, NULL
491 490 };
492 491 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
493 492 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
|
↓ open down ↓ |
327 lines elided |
↑ open up ↑ |
494 493 VOPNAME_READ, { .femop_read = deleg_wr_read },
495 494 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
496 495 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
497 496 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
498 497 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
499 498 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
500 499 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
501 500 NULL, NULL
502 501 };
503 502
504 -/* ARGSUSED */
505 -static void *
506 -rfs4_zone_init(zoneid_t zoneid)
503 +nfs4_srv_t *
504 +nfs4_get_srv(void)
507 505 {
506 + nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
507 + nfs4_srv_t *srv = ng->nfs4_srv;
508 + ASSERT(srv != NULL);
509 + return (srv);
510 +}
511 +
512 +void
513 +rfs4_srv_zone_init(nfs_globals_t *ng)
514 +{
508 515 nfs4_srv_t *nsrv4;
509 516 timespec32_t verf;
510 517
511 518 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
512 519
513 520 /*
514 521 * The following algorithm attempts to find a unique verifier
515 522 * to be used as the write verifier returned from the server
516 523 * to the client. It is important that this verifier change
517 524 * whenever the server reboots. Of secondary importance, it
518 525 * is important for the verifier to be unique between two
519 526 * different servers.
520 527 *
521 528 * Thus, an attempt is made to use the system hostid and the
522 529 * current time in seconds when the nfssrv kernel module is
523 530 * loaded. It is assumed that an NFS server will not be able
524 531 * to boot and then to reboot in less than a second. If the
525 532 * hostid has not been set, then the current high resolution
526 533 * time is used. This will ensure different verifiers each
527 534 * time the server reboots and minimize the chances that two
528 535 * different servers will have the same verifier.
529 536 * XXX - this is broken on LP64 kernels.
530 537 */
531 538 verf.tv_sec = (time_t)zone_get_hostid(NULL);
532 539 if (verf.tv_sec != 0) {
533 540 verf.tv_nsec = gethrestime_sec();
534 541 } else {
535 542 timespec_t tverf;
536 543
537 544 gethrestime(&tverf);
538 545 verf.tv_sec = (time_t)tverf.tv_sec;
539 546 verf.tv_nsec = tverf.tv_nsec;
540 547 }
541 548 nsrv4->write4verf = *(uint64_t *)&verf;
|
↓ open down ↓ |
24 lines elided |
↑ open up ↑ |
542 549
543 550 /* Used to manage create/destroy of server state */
544 551 nsrv4->nfs4_server_state = NULL;
545 552 nsrv4->nfs4_cur_servinst = NULL;
546 553 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
547 554 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
548 555 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
549 556 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
550 557 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
551 558
552 - return (nsrv4);
559 + ng->nfs4_srv = nsrv4;
553 560 }
554 561
555 -/* ARGSUSED */
556 -static void
557 -rfs4_zone_fini(zoneid_t zoneid, void *data)
562 +void
563 +rfs4_srv_zone_fini(nfs_globals_t *ng)
558 564 {
559 - nfs4_srv_t *nsrv4 = data;
565 + nfs4_srv_t *nsrv4 = ng->nfs4_srv;
560 566
567 + ng->nfs4_srv = NULL;
568 +
561 569 mutex_destroy(&nsrv4->deleg_lock);
562 570 mutex_destroy(&nsrv4->state_lock);
563 571 mutex_destroy(&nsrv4->servinst_lock);
564 572 rw_destroy(&nsrv4->deleg_policy_lock);
565 573
566 574 kmem_free(nsrv4, sizeof (*nsrv4));
567 575 }
568 576
569 577 void
570 578 rfs4_srvrinit(void)
571 579 {
572 580 extern void rfs4_attr_init();
573 581
574 - zone_key_create(&rfs4_zone_key, rfs4_zone_init, NULL, rfs4_zone_fini);
575 -
576 582 rfs4_attr_init();
577 583
578 -
579 584 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
580 585 rfs4_disable_delegation();
581 586 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
582 587 &deleg_wrops) != 0) {
583 588 rfs4_disable_delegation();
584 589 fem_free(deleg_rdops);
585 590 }
586 591
587 592 nfs4_srv_caller_id = fs_new_caller_id();
588 593 lockt_sysid = lm_alloc_sysidt();
589 594 vsd_create(&nfs4_srv_vkey, NULL);
590 595 rfs4_state_g_init();
591 596 }
592 597
593 598 void
594 599 rfs4_srvrfini(void)
|
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
595 600 {
596 601 if (lockt_sysid != LM_NOSYSID) {
597 602 lm_free_sysidt(lockt_sysid);
598 603 lockt_sysid = LM_NOSYSID;
599 604 }
600 605
601 606 rfs4_state_g_fini();
602 607
603 608 fem_free(deleg_rdops);
604 609 fem_free(deleg_wrops);
605 -
606 - (void) zone_key_delete(rfs4_zone_key);
607 610 }
608 611
609 612 void
610 613 rfs4_do_server_start(int server_upordown,
611 614 int srv_delegation, int cluster_booted)
612 615 {
613 - nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
616 + nfs4_srv_t *nsrv4 = nfs4_get_srv();
614 617
615 618 /* Is this a warm start? */
616 619 if (server_upordown == NFS_SERVER_QUIESCED) {
617 620 cmn_err(CE_NOTE, "nfs4_srv: "
618 621 "server was previously quiesced; "
619 622 "existing NFSv4 state will be re-used");
620 623
621 624 /*
622 625 * HA-NFSv4: this is also the signal
623 626 * that a Resource Group failover has
624 627 * occurred.
625 628 */
626 629 if (cluster_booted)
627 630 hanfsv4_failover(nsrv4);
628 631 } else {
629 632 /* Cold start */
630 633 nsrv4->rfs4_start_time = 0;
631 634 rfs4_state_zone_init(nsrv4);
632 635 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
633 636 nfs4_drc_hash);
634 637
635 638 /*
636 639 * The nfsd service was started with the -s option
637 640 * we need to pull in any state from the paths indicated.
638 641 */
639 642 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
640 643 /* read in the stable storage state from these paths */
641 644 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
642 645 rfs4_dss_newpaths);
643 646 }
644 647 }
645 648
646 649 /* Check if delegation is to be enabled */
647 650 if (srv_delegation != FALSE)
648 651 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
649 652 }
650 653
651 654 void
652 655 rfs4_init_compound_state(struct compound_state *cs)
653 656 {
654 657 bzero(cs, sizeof (*cs));
655 658 cs->cont = TRUE;
656 659 cs->access = CS_ACCESS_DENIED;
657 660 cs->deleg = FALSE;
658 661 cs->mandlock = FALSE;
659 662 cs->fh.nfs_fh4_val = cs->fhbuf;
660 663 }
661 664
662 665 void
663 666 rfs4_grace_start(rfs4_servinst_t *sip)
664 667 {
665 668 rw_enter(&sip->rwlock, RW_WRITER);
666 669 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
667 670 sip->grace_period = rfs4_grace_period;
668 671 rw_exit(&sip->rwlock);
669 672 }
670 673
671 674 /*
672 675 * returns true if the instance's grace period has never been started
673 676 */
674 677 int
675 678 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
676 679 {
677 680 time_t start_time;
678 681
679 682 rw_enter(&sip->rwlock, RW_READER);
680 683 start_time = sip->start_time;
681 684 rw_exit(&sip->rwlock);
682 685
683 686 return (start_time == 0);
684 687 }
685 688
686 689 /*
687 690 * Indicates if server instance is within the
688 691 * grace period.
689 692 */
690 693 int
691 694 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
692 695 {
693 696 time_t grace_expiry;
694 697
695 698 rw_enter(&sip->rwlock, RW_READER);
696 699 grace_expiry = sip->start_time + sip->grace_period;
697 700 rw_exit(&sip->rwlock);
698 701
699 702 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
700 703 }
701 704
702 705 int
703 706 rfs4_clnt_in_grace(rfs4_client_t *cp)
704 707 {
705 708 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
706 709
707 710 return (rfs4_servinst_in_grace(cp->rc_server_instance));
708 711 }
709 712
710 713 /*
711 714 * reset all currently active grace periods
712 715 */
713 716 void
714 717 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
715 718 {
716 719 rfs4_servinst_t *sip;
717 720
718 721 mutex_enter(&nsrv4->servinst_lock);
719 722 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
720 723 if (rfs4_servinst_in_grace(sip))
721 724 rfs4_grace_start(sip);
722 725 mutex_exit(&nsrv4->servinst_lock);
723 726 }
724 727
725 728 /*
726 729 * start any new instances' grace periods
727 730 */
728 731 void
729 732 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
730 733 {
731 734 rfs4_servinst_t *sip;
732 735
733 736 mutex_enter(&nsrv4->servinst_lock);
734 737 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
735 738 if (rfs4_servinst_grace_new(sip))
736 739 rfs4_grace_start(sip);
737 740 mutex_exit(&nsrv4->servinst_lock);
738 741 }
739 742
740 743 static rfs4_dss_path_t *
741 744 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
742 745 char *path, unsigned index)
743 746 {
744 747 size_t len;
745 748 rfs4_dss_path_t *dss_path;
746 749
747 750 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
748 751
749 752 /*
750 753 * Take a copy of the string, since the original may be overwritten.
751 754 * Sadly, no strdup() in the kernel.
752 755 */
753 756 /* allow for NUL */
754 757 len = strlen(path) + 1;
755 758 dss_path->path = kmem_alloc(len, KM_SLEEP);
756 759 (void) strlcpy(dss_path->path, path, len);
757 760
758 761 /* associate with servinst */
759 762 dss_path->sip = sip;
760 763 dss_path->index = index;
761 764
762 765 /*
763 766 * Add to list of served paths.
764 767 * No locking required, as we're only ever called at startup.
765 768 */
766 769 if (nsrv4->dss_pathlist == NULL) {
767 770 /* this is the first dss_path_t */
768 771
769 772 /* needed for insque/remque */
770 773 dss_path->next = dss_path->prev = dss_path;
771 774
772 775 nsrv4->dss_pathlist = dss_path;
773 776 } else {
774 777 insque(dss_path, nsrv4->dss_pathlist);
775 778 }
776 779
777 780 return (dss_path);
778 781 }
779 782
780 783 /*
781 784 * Create a new server instance, and make it the currently active instance.
782 785 * Note that starting the grace period too early will reduce the clients'
783 786 * recovery window.
784 787 */
785 788 void
786 789 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
787 790 int dss_npaths, char **dss_paths)
788 791 {
789 792 unsigned i;
790 793 rfs4_servinst_t *sip;
791 794 rfs4_oldstate_t *oldstate;
792 795
793 796 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
794 797 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
795 798
796 799 sip->start_time = (time_t)0;
797 800 sip->grace_period = (time_t)0;
798 801 sip->next = NULL;
799 802 sip->prev = NULL;
800 803
801 804 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
802 805 /*
803 806 * This initial dummy entry is required to setup for insque/remque.
804 807 * It must be skipped over whenever the list is traversed.
805 808 */
806 809 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
807 810 /* insque/remque require initial list entry to be self-terminated */
808 811 oldstate->next = oldstate;
809 812 oldstate->prev = oldstate;
810 813 sip->oldstate = oldstate;
811 814
812 815
813 816 sip->dss_npaths = dss_npaths;
814 817 sip->dss_paths = kmem_alloc(dss_npaths *
815 818 sizeof (rfs4_dss_path_t *), KM_SLEEP);
816 819
817 820 for (i = 0; i < dss_npaths; i++) {
818 821 sip->dss_paths[i] =
819 822 rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
820 823 }
821 824
822 825 mutex_enter(&nsrv4->servinst_lock);
823 826 if (nsrv4->nfs4_cur_servinst != NULL) {
824 827 /* add to linked list */
825 828 sip->prev = nsrv4->nfs4_cur_servinst;
826 829 nsrv4->nfs4_cur_servinst->next = sip;
827 830 }
828 831 if (start_grace)
829 832 rfs4_grace_start(sip);
830 833 /* make the new instance "current" */
831 834 nsrv4->nfs4_cur_servinst = sip;
832 835
833 836 mutex_exit(&nsrv4->servinst_lock);
834 837 }
835 838
836 839 /*
837 840 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
838 841 * all instances directly.
839 842 */
840 843 void
841 844 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
842 845 {
843 846 rfs4_servinst_t *sip, *prev, *current;
844 847 #ifdef DEBUG
845 848 int n = 0;
846 849 #endif
847 850
848 851 mutex_enter(&nsrv4->servinst_lock);
849 852 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
850 853 current = nsrv4->nfs4_cur_servinst;
851 854 nsrv4->nfs4_cur_servinst = NULL;
852 855 for (sip = current; sip != NULL; sip = prev) {
853 856 prev = sip->prev;
854 857 rw_destroy(&sip->rwlock);
855 858 if (sip->oldstate)
856 859 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
857 860 if (sip->dss_paths) {
858 861 int i = sip->dss_npaths;
859 862
860 863 while (i > 0) {
861 864 i--;
862 865 if (sip->dss_paths[i] != NULL) {
863 866 char *path = sip->dss_paths[i]->path;
864 867
865 868 if (path != NULL) {
866 869 kmem_free(path,
867 870 strlen(path) + 1);
868 871 }
869 872 kmem_free(sip->dss_paths[i],
870 873 sizeof (rfs4_dss_path_t));
871 874 }
872 875 }
873 876 kmem_free(sip->dss_paths,
874 877 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
875 878 }
876 879 kmem_free(sip, sizeof (rfs4_servinst_t));
877 880 #ifdef DEBUG
878 881 n++;
879 882 #endif
880 883 }
881 884 mutex_exit(&nsrv4->servinst_lock);
882 885 }
883 886
884 887 /*
885 888 * Assign the current server instance to a client_t.
886 889 * Should be called with cp->rc_dbe held.
887 890 */
888 891 void
889 892 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
890 893 rfs4_servinst_t *sip)
891 894 {
892 895 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
893 896
894 897 /*
895 898 * The lock ensures that if the current instance is in the process
896 899 * of changing, we will see the new one.
897 900 */
898 901 mutex_enter(&nsrv4->servinst_lock);
899 902 cp->rc_server_instance = sip;
900 903 mutex_exit(&nsrv4->servinst_lock);
901 904 }
902 905
903 906 rfs4_servinst_t *
904 907 rfs4_servinst(rfs4_client_t *cp)
905 908 {
906 909 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
907 910
908 911 return (cp->rc_server_instance);
909 912 }
910 913
911 914 /* ARGSUSED */
912 915 static void
913 916 nullfree(caddr_t resop)
914 917 {
915 918 }
916 919
917 920 /*
918 921 * This is a fall-through for invalid or not implemented (yet) ops
919 922 */
920 923 /* ARGSUSED */
921 924 static void
922 925 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
923 926 struct compound_state *cs)
924 927 {
925 928 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
926 929 }
927 930
928 931 /*
929 932 * Check if the security flavor, nfsnum, is in the flavor_list.
930 933 */
931 934 bool_t
932 935 in_flavor_list(int nfsnum, int *flavor_list, int count)
933 936 {
934 937 int i;
935 938
936 939 for (i = 0; i < count; i++) {
937 940 if (nfsnum == flavor_list[i])
938 941 return (TRUE);
939 942 }
940 943 return (FALSE);
941 944 }
942 945
943 946 /*
944 947 * Used by rfs4_op_secinfo to get the security information from the
945 948 * export structure associated with the component.
946 949 */
947 950 /* ARGSUSED */
948 951 static nfsstat4
949 952 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
950 953 {
951 954 int error, different_export = 0;
952 955 vnode_t *dvp, *vp;
953 956 struct exportinfo *exi = NULL;
954 957 fid_t fid;
955 958 uint_t count, i;
956 959 secinfo4 *resok_val;
957 960 struct secinfo *secp;
958 961 seconfig_t *si;
959 962 bool_t did_traverse = FALSE;
960 963 int dotdot, walk;
961 964 nfs_export_t *ne = nfs_get_export();
962 965
963 966 dvp = cs->vp;
964 967 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
965 968
966 969 /*
967 970 * If dotdotting, then need to check whether it's above the
968 971 * root of a filesystem, or above an export point.
969 972 */
970 973 if (dotdot) {
971 974
972 975 /*
973 976 * If dotdotting at the root of a filesystem, then
974 977 * need to traverse back to the mounted-on filesystem
975 978 * and do the dotdot lookup there.
976 979 */
977 980 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
978 981
979 982 /*
980 983 * If at the system root, then can
981 984 * go up no further.
982 985 */
983 986 if (VN_CMP(dvp, ZONE_ROOTVP()))
984 987 return (puterrno4(ENOENT));
985 988
986 989 /*
987 990 * Traverse back to the mounted-on filesystem
988 991 */
989 992 dvp = untraverse(cs->vp);
990 993
991 994 /*
992 995 * Set the different_export flag so we remember
993 996 * to pick up a new exportinfo entry for
994 997 * this new filesystem.
995 998 */
996 999 different_export = 1;
997 1000 } else {
998 1001
999 1002 /*
1000 1003 * If dotdotting above an export point then set
1001 1004 * the different_export to get new export info.
1002 1005 */
1003 1006 different_export = nfs_exported(cs->exi, cs->vp);
1004 1007 }
1005 1008 }
1006 1009
1007 1010 /*
1008 1011 * Get the vnode for the component "nm".
1009 1012 */
1010 1013 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1011 1014 NULL, NULL, NULL);
1012 1015 if (error)
1013 1016 return (puterrno4(error));
1014 1017
1015 1018 /*
1016 1019 * If the vnode is in a pseudo filesystem, or if the security flavor
1017 1020 * used in the request is valid but not an explicitly shared flavor,
1018 1021 * or the access bit indicates that this is a limited access,
1019 1022 * check whether this vnode is visible.
1020 1023 */
1021 1024 if (!different_export &&
1022 1025 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
1023 1026 cs->access & CS_ACCESS_LIMITED)) {
1024 1027 if (! nfs_visible(cs->exi, vp, &different_export)) {
1025 1028 VN_RELE(vp);
1026 1029 return (puterrno4(ENOENT));
1027 1030 }
1028 1031 }
1029 1032
1030 1033 /*
1031 1034 * If it's a mountpoint, then traverse it.
1032 1035 */
1033 1036 if (vn_ismntpt(vp)) {
1034 1037 if ((error = traverse(&vp)) != 0) {
1035 1038 VN_RELE(vp);
1036 1039 return (puterrno4(error));
1037 1040 }
1038 1041 /* remember that we had to traverse mountpoint */
1039 1042 did_traverse = TRUE;
1040 1043 different_export = 1;
1041 1044 } else if (vp->v_vfsp != dvp->v_vfsp) {
1042 1045 /*
1043 1046 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1044 1047 * then vp is probably an LOFS object. We don't need the
1045 1048 * realvp, we just need to know that we might have crossed
1046 1049 * a server fs boundary and need to call checkexport4.
1047 1050 * (LOFS lookup hides server fs mountpoints, and actually calls
1048 1051 * traverse)
1049 1052 */
1050 1053 different_export = 1;
1051 1054 }
1052 1055
1053 1056 /*
1054 1057 * Get the export information for it.
1055 1058 */
1056 1059 if (different_export) {
1057 1060
1058 1061 bzero(&fid, sizeof (fid));
1059 1062 fid.fid_len = MAXFIDSZ;
1060 1063 error = vop_fid_pseudo(vp, &fid);
1061 1064 if (error) {
1062 1065 VN_RELE(vp);
1063 1066 return (puterrno4(error));
1064 1067 }
1065 1068
1066 1069 if (dotdot)
1067 1070 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1068 1071 else
1069 1072 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1070 1073
1071 1074 if (exi == NULL) {
1072 1075 if (did_traverse == TRUE) {
1073 1076 /*
1074 1077 * If this vnode is a mounted-on vnode,
1075 1078 * but the mounted-on file system is not
1076 1079 * exported, send back the secinfo for
1077 1080 * the exported node that the mounted-on
1078 1081 * vnode lives in.
1079 1082 */
1080 1083 exi = cs->exi;
1081 1084 } else {
1082 1085 VN_RELE(vp);
1083 1086 return (puterrno4(EACCES));
1084 1087 }
1085 1088 }
1086 1089 } else {
1087 1090 exi = cs->exi;
1088 1091 }
1089 1092 ASSERT(exi != NULL);
1090 1093
1091 1094
1092 1095 /*
1093 1096 * Create the secinfo result based on the security information
1094 1097 * from the exportinfo structure (exi).
1095 1098 *
1096 1099 * Return all flavors for a pseudo node.
1097 1100 * For a real export node, return the flavor that the client
1098 1101 * has access with.
1099 1102 */
1100 1103 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1101 1104 if (PSEUDO(exi)) {
1102 1105 count = exi->exi_export.ex_seccnt; /* total sec count */
1103 1106 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1104 1107 secp = exi->exi_export.ex_secinfo;
1105 1108
1106 1109 for (i = 0; i < count; i++) {
1107 1110 si = &secp[i].s_secinfo;
1108 1111 resok_val[i].flavor = si->sc_rpcnum;
1109 1112 if (resok_val[i].flavor == RPCSEC_GSS) {
1110 1113 rpcsec_gss_info *info;
1111 1114
1112 1115 info = &resok_val[i].flavor_info;
1113 1116 info->qop = si->sc_qop;
1114 1117 info->service = (rpc_gss_svc_t)si->sc_service;
1115 1118
1116 1119 /* get oid opaque data */
1117 1120 info->oid.sec_oid4_len =
1118 1121 si->sc_gss_mech_type->length;
1119 1122 info->oid.sec_oid4_val = kmem_alloc(
1120 1123 si->sc_gss_mech_type->length, KM_SLEEP);
1121 1124 bcopy(
1122 1125 si->sc_gss_mech_type->elements,
1123 1126 info->oid.sec_oid4_val,
1124 1127 info->oid.sec_oid4_len);
1125 1128 }
1126 1129 }
1127 1130 resp->SECINFO4resok_len = count;
1128 1131 resp->SECINFO4resok_val = resok_val;
1129 1132 } else {
1130 1133 int ret_cnt = 0, k = 0;
1131 1134 int *flavor_list;
1132 1135
1133 1136 count = exi->exi_export.ex_seccnt; /* total sec count */
1134 1137 secp = exi->exi_export.ex_secinfo;
1135 1138
1136 1139 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1137 1140 /* find out which flavors to return */
1138 1141 for (i = 0; i < count; i ++) {
1139 1142 int access, flavor, perm;
1140 1143
1141 1144 flavor = secp[i].s_secinfo.sc_nfsnum;
1142 1145 perm = secp[i].s_flags;
1143 1146
1144 1147 access = nfsauth4_secinfo_access(exi, cs->req,
1145 1148 flavor, perm, cs->basecr);
1146 1149
1147 1150 if (! (access & NFSAUTH_DENIED) &&
1148 1151 ! (access & NFSAUTH_WRONGSEC)) {
1149 1152 flavor_list[ret_cnt] = flavor;
1150 1153 ret_cnt++;
1151 1154 }
1152 1155 }
1153 1156
1154 1157 /* Create the returning SECINFO value */
1155 1158 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1156 1159
1157 1160 for (i = 0; i < count; i++) {
1158 1161 /*
1159 1162 * If the flavor is in the flavor list,
1160 1163 * fill in resok_val.
1161 1164 */
1162 1165 si = &secp[i].s_secinfo;
1163 1166 if (in_flavor_list(si->sc_nfsnum,
1164 1167 flavor_list, ret_cnt)) {
1165 1168 resok_val[k].flavor = si->sc_rpcnum;
1166 1169 if (resok_val[k].flavor == RPCSEC_GSS) {
1167 1170 rpcsec_gss_info *info;
1168 1171
1169 1172 info = &resok_val[k].flavor_info;
1170 1173 info->qop = si->sc_qop;
1171 1174 info->service = (rpc_gss_svc_t)
1172 1175 si->sc_service;
1173 1176
1174 1177 /* get oid opaque data */
1175 1178 info->oid.sec_oid4_len =
1176 1179 si->sc_gss_mech_type->length;
1177 1180 info->oid.sec_oid4_val = kmem_alloc(
1178 1181 si->sc_gss_mech_type->length,
1179 1182 KM_SLEEP);
1180 1183 bcopy(si->sc_gss_mech_type->elements,
1181 1184 info->oid.sec_oid4_val,
1182 1185 info->oid.sec_oid4_len);
1183 1186 }
1184 1187 k++;
1185 1188 }
1186 1189 if (k >= ret_cnt)
1187 1190 break;
1188 1191 }
1189 1192 resp->SECINFO4resok_len = ret_cnt;
1190 1193 resp->SECINFO4resok_val = resok_val;
1191 1194 kmem_free(flavor_list, count * sizeof (int));
1192 1195 }
1193 1196
1194 1197 VN_RELE(vp);
1195 1198 return (NFS4_OK);
1196 1199 }
1197 1200
1198 1201 /*
1199 1202 * SECINFO (Operation 33): Obtain required security information on
1200 1203 * the component name in the format of (security-mechanism-oid, qop, service)
1201 1204 * triplets.
1202 1205 */
1203 1206 /* ARGSUSED */
1204 1207 static void
1205 1208 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1206 1209 struct compound_state *cs)
1207 1210 {
1208 1211 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1209 1212 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1210 1213 utf8string *utfnm = &args->name;
1211 1214 uint_t len;
1212 1215 char *nm;
1213 1216 struct sockaddr *ca;
1214 1217 char *name = NULL;
1215 1218 nfsstat4 status = NFS4_OK;
1216 1219
1217 1220 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1218 1221 SECINFO4args *, args);
1219 1222
1220 1223 /*
1221 1224 * Current file handle (cfh) should have been set before getting
1222 1225 * into this function. If not, return error.
1223 1226 */
1224 1227 if (cs->vp == NULL) {
1225 1228 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1226 1229 goto out;
1227 1230 }
1228 1231
1229 1232 if (cs->vp->v_type != VDIR) {
1230 1233 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1231 1234 goto out;
1232 1235 }
1233 1236
1234 1237 /*
1235 1238 * Verify the component name. If failed, error out, but
1236 1239 * do not error out if the component name is a "..".
1237 1240 * SECINFO will return its parents secinfo data for SECINFO "..".
1238 1241 */
1239 1242 status = utf8_dir_verify(utfnm);
1240 1243 if (status != NFS4_OK) {
1241 1244 if (utfnm->utf8string_len != 2 ||
1242 1245 utfnm->utf8string_val[0] != '.' ||
1243 1246 utfnm->utf8string_val[1] != '.') {
1244 1247 *cs->statusp = resp->status = status;
1245 1248 goto out;
1246 1249 }
1247 1250 }
1248 1251
1249 1252 nm = utf8_to_str(utfnm, &len, NULL);
1250 1253 if (nm == NULL) {
1251 1254 *cs->statusp = resp->status = NFS4ERR_INVAL;
1252 1255 goto out;
1253 1256 }
1254 1257
1255 1258 if (len > MAXNAMELEN) {
1256 1259 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1257 1260 kmem_free(nm, len);
1258 1261 goto out;
1259 1262 }
1260 1263
1261 1264 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1262 1265 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1263 1266 MAXPATHLEN + 1);
1264 1267
1265 1268 if (name == NULL) {
1266 1269 *cs->statusp = resp->status = NFS4ERR_INVAL;
1267 1270 kmem_free(nm, len);
1268 1271 goto out;
1269 1272 }
1270 1273
1271 1274
1272 1275 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1273 1276
1274 1277 if (name != nm)
1275 1278 kmem_free(name, MAXPATHLEN + 1);
1276 1279 kmem_free(nm, len);
1277 1280
1278 1281 out:
1279 1282 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1280 1283 SECINFO4res *, resp);
1281 1284 }
1282 1285
1283 1286 /*
1284 1287 * Free SECINFO result.
1285 1288 */
1286 1289 /* ARGSUSED */
1287 1290 static void
1288 1291 rfs4_op_secinfo_free(nfs_resop4 *resop)
1289 1292 {
1290 1293 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1291 1294 int count, i;
1292 1295 secinfo4 *resok_val;
1293 1296
1294 1297 /* If this is not an Ok result, nothing to free. */
1295 1298 if (resp->status != NFS4_OK) {
1296 1299 return;
1297 1300 }
1298 1301
1299 1302 count = resp->SECINFO4resok_len;
1300 1303 resok_val = resp->SECINFO4resok_val;
1301 1304
1302 1305 for (i = 0; i < count; i++) {
1303 1306 if (resok_val[i].flavor == RPCSEC_GSS) {
1304 1307 rpcsec_gss_info *info;
1305 1308
1306 1309 info = &resok_val[i].flavor_info;
1307 1310 kmem_free(info->oid.sec_oid4_val,
1308 1311 info->oid.sec_oid4_len);
1309 1312 }
1310 1313 }
1311 1314 kmem_free(resok_val, count * sizeof (secinfo4));
1312 1315 resp->SECINFO4resok_len = 0;
1313 1316 resp->SECINFO4resok_val = NULL;
1314 1317 }
1315 1318
1316 1319 /* ARGSUSED */
1317 1320 static void
1318 1321 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1319 1322 struct compound_state *cs)
1320 1323 {
1321 1324 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1322 1325 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1323 1326 int error;
1324 1327 vnode_t *vp;
1325 1328 struct vattr va;
1326 1329 int checkwriteperm;
1327 1330 cred_t *cr = cs->cr;
1328 1331 bslabel_t *clabel, *slabel;
1329 1332 ts_label_t *tslabel;
1330 1333 boolean_t admin_low_client;
1331 1334
1332 1335 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1333 1336 ACCESS4args *, args);
1334 1337
1335 1338 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1336 1339 if (cs->access == CS_ACCESS_DENIED) {
1337 1340 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1338 1341 goto out;
1339 1342 }
1340 1343 #endif
1341 1344 if (cs->vp == NULL) {
1342 1345 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1343 1346 goto out;
1344 1347 }
1345 1348
1346 1349 ASSERT(cr != NULL);
1347 1350
1348 1351 vp = cs->vp;
1349 1352
1350 1353 /*
1351 1354 * If the file system is exported read only, it is not appropriate
1352 1355 * to check write permissions for regular files and directories.
1353 1356 * Special files are interpreted by the client, so the underlying
1354 1357 * permissions are sent back to the client for interpretation.
1355 1358 */
1356 1359 if (rdonly4(req, cs) &&
1357 1360 (vp->v_type == VREG || vp->v_type == VDIR))
1358 1361 checkwriteperm = 0;
1359 1362 else
1360 1363 checkwriteperm = 1;
1361 1364
1362 1365 /*
1363 1366 * XXX
1364 1367 * We need the mode so that we can correctly determine access
1365 1368 * permissions relative to a mandatory lock file. Access to
1366 1369 * mandatory lock files is denied on the server, so it might
1367 1370 * as well be reflected to the server during the open.
1368 1371 */
1369 1372 va.va_mask = AT_MODE;
1370 1373 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1371 1374 if (error) {
1372 1375 *cs->statusp = resp->status = puterrno4(error);
1373 1376 goto out;
1374 1377 }
1375 1378 resp->access = 0;
1376 1379 resp->supported = 0;
1377 1380
1378 1381 if (is_system_labeled()) {
1379 1382 ASSERT(req->rq_label != NULL);
1380 1383 clabel = req->rq_label;
1381 1384 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1382 1385 "got client label from request(1)",
1383 1386 struct svc_req *, req);
1384 1387 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1385 1388 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1386 1389 *cs->statusp = resp->status = puterrno4(EACCES);
1387 1390 goto out;
1388 1391 }
1389 1392 slabel = label2bslabel(tslabel);
1390 1393 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1391 1394 char *, "got server label(1) for vp(2)",
1392 1395 bslabel_t *, slabel, vnode_t *, vp);
1393 1396
1394 1397 admin_low_client = B_FALSE;
1395 1398 } else
1396 1399 admin_low_client = B_TRUE;
1397 1400 }
1398 1401
1399 1402 if (args->access & ACCESS4_READ) {
1400 1403 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1401 1404 if (!error && !MANDLOCK(vp, va.va_mode) &&
1402 1405 (!is_system_labeled() || admin_low_client ||
1403 1406 bldominates(clabel, slabel)))
1404 1407 resp->access |= ACCESS4_READ;
1405 1408 resp->supported |= ACCESS4_READ;
1406 1409 }
1407 1410 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1408 1411 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1409 1412 if (!error && (!is_system_labeled() || admin_low_client ||
1410 1413 bldominates(clabel, slabel)))
1411 1414 resp->access |= ACCESS4_LOOKUP;
1412 1415 resp->supported |= ACCESS4_LOOKUP;
1413 1416 }
1414 1417 if (checkwriteperm &&
1415 1418 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1416 1419 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1417 1420 if (!error && !MANDLOCK(vp, va.va_mode) &&
1418 1421 (!is_system_labeled() || admin_low_client ||
1419 1422 blequal(clabel, slabel)))
1420 1423 resp->access |=
1421 1424 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1422 1425 resp->supported |=
1423 1426 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1424 1427 }
1425 1428
1426 1429 if (checkwriteperm &&
1427 1430 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1428 1431 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1429 1432 if (!error && (!is_system_labeled() || admin_low_client ||
1430 1433 blequal(clabel, slabel)))
1431 1434 resp->access |= ACCESS4_DELETE;
1432 1435 resp->supported |= ACCESS4_DELETE;
1433 1436 }
1434 1437 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1435 1438 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1436 1439 if (!error && !MANDLOCK(vp, va.va_mode) &&
1437 1440 (!is_system_labeled() || admin_low_client ||
1438 1441 bldominates(clabel, slabel)))
1439 1442 resp->access |= ACCESS4_EXECUTE;
1440 1443 resp->supported |= ACCESS4_EXECUTE;
1441 1444 }
1442 1445
1443 1446 if (is_system_labeled() && !admin_low_client)
1444 1447 label_rele(tslabel);
1445 1448
1446 1449 *cs->statusp = resp->status = NFS4_OK;
1447 1450 out:
1448 1451 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1449 1452 ACCESS4res *, resp);
1450 1453 }
1451 1454
1452 1455 /* ARGSUSED */
1453 1456 static void
1454 1457 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1455 1458 struct compound_state *cs)
1456 1459 {
1457 1460 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1458 1461 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1459 1462 int error;
1460 1463 vnode_t *vp = cs->vp;
1461 1464 cred_t *cr = cs->cr;
1462 1465 vattr_t va;
1463 1466 nfs4_srv_t *nsrv4;
1464 1467
1465 1468 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1466 1469 COMMIT4args *, args);
1467 1470
1468 1471 if (vp == NULL) {
1469 1472 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1470 1473 goto out;
1471 1474 }
1472 1475 if (cs->access == CS_ACCESS_DENIED) {
1473 1476 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1474 1477 goto out;
1475 1478 }
1476 1479
1477 1480 if (args->offset + args->count < args->offset) {
1478 1481 *cs->statusp = resp->status = NFS4ERR_INVAL;
1479 1482 goto out;
1480 1483 }
1481 1484
1482 1485 va.va_mask = AT_UID;
1483 1486 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1484 1487
1485 1488 /*
1486 1489 * If we can't get the attributes, then we can't do the
1487 1490 * right access checking. So, we'll fail the request.
1488 1491 */
1489 1492 if (error) {
1490 1493 *cs->statusp = resp->status = puterrno4(error);
1491 1494 goto out;
1492 1495 }
1493 1496 if (rdonly4(req, cs)) {
1494 1497 *cs->statusp = resp->status = NFS4ERR_ROFS;
1495 1498 goto out;
1496 1499 }
1497 1500
1498 1501 if (vp->v_type != VREG) {
1499 1502 if (vp->v_type == VDIR)
1500 1503 resp->status = NFS4ERR_ISDIR;
1501 1504 else
1502 1505 resp->status = NFS4ERR_INVAL;
1503 1506 *cs->statusp = resp->status;
1504 1507 goto out;
1505 1508 }
1506 1509
1507 1510 if (crgetuid(cr) != va.va_uid &&
1508 1511 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1509 1512 *cs->statusp = resp->status = puterrno4(error);
|
↓ open down ↓ |
886 lines elided |
↑ open up ↑ |
1510 1513 goto out;
1511 1514 }
1512 1515
1513 1516 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1514 1517
1515 1518 if (error) {
1516 1519 *cs->statusp = resp->status = puterrno4(error);
1517 1520 goto out;
1518 1521 }
1519 1522
1520 - nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1523 + nsrv4 = nfs4_get_srv();
1521 1524 *cs->statusp = resp->status = NFS4_OK;
1522 1525 resp->writeverf = nsrv4->write4verf;
1523 1526 out:
1524 1527 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1525 1528 COMMIT4res *, resp);
1526 1529 }
1527 1530
1528 1531 /*
1529 1532 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1530 1533 * was completed. It does the nfsv4 create for special files.
1531 1534 */
1532 1535 /* ARGSUSED */
1533 1536 static vnode_t *
1534 1537 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1535 1538 struct compound_state *cs, vattr_t *vap, char *nm)
1536 1539 {
1537 1540 int error;
1538 1541 cred_t *cr = cs->cr;
1539 1542 vnode_t *dvp = cs->vp;
1540 1543 vnode_t *vp = NULL;
1541 1544 int mode;
1542 1545 enum vcexcl excl;
1543 1546
1544 1547 switch (args->type) {
1545 1548 case NF4CHR:
1546 1549 case NF4BLK:
1547 1550 if (secpolicy_sys_devices(cr) != 0) {
1548 1551 *cs->statusp = resp->status = NFS4ERR_PERM;
1549 1552 return (NULL);
1550 1553 }
1551 1554 if (args->type == NF4CHR)
1552 1555 vap->va_type = VCHR;
1553 1556 else
1554 1557 vap->va_type = VBLK;
1555 1558 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1556 1559 args->ftype4_u.devdata.specdata2);
1557 1560 vap->va_mask |= AT_RDEV;
1558 1561 break;
1559 1562 case NF4SOCK:
1560 1563 vap->va_type = VSOCK;
1561 1564 break;
1562 1565 case NF4FIFO:
1563 1566 vap->va_type = VFIFO;
1564 1567 break;
1565 1568 default:
1566 1569 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1567 1570 return (NULL);
1568 1571 }
1569 1572
1570 1573 /*
1571 1574 * Must specify the mode.
1572 1575 */
1573 1576 if (!(vap->va_mask & AT_MODE)) {
1574 1577 *cs->statusp = resp->status = NFS4ERR_INVAL;
1575 1578 return (NULL);
1576 1579 }
1577 1580
1578 1581 excl = EXCL;
1579 1582
1580 1583 mode = 0;
1581 1584
1582 1585 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1583 1586 if (error) {
1584 1587 *cs->statusp = resp->status = puterrno4(error);
1585 1588 return (NULL);
1586 1589 }
1587 1590 return (vp);
1588 1591 }
1589 1592
1590 1593 /*
1591 1594 * nfsv4 create is used to create non-regular files. For regular files,
1592 1595 * use nfsv4 open.
1593 1596 */
1594 1597 /* ARGSUSED */
1595 1598 static void
1596 1599 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1597 1600 struct compound_state *cs)
1598 1601 {
1599 1602 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1600 1603 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1601 1604 int error;
1602 1605 struct vattr bva, iva, iva2, ava, *vap;
1603 1606 cred_t *cr = cs->cr;
1604 1607 vnode_t *dvp = cs->vp;
1605 1608 vnode_t *vp = NULL;
1606 1609 vnode_t *realvp;
1607 1610 char *nm, *lnm;
1608 1611 uint_t len, llen;
1609 1612 int syncval = 0;
1610 1613 struct nfs4_svgetit_arg sarg;
1611 1614 struct nfs4_ntov_table ntov;
1612 1615 struct statvfs64 sb;
1613 1616 nfsstat4 status;
1614 1617 struct sockaddr *ca;
1615 1618 char *name = NULL;
1616 1619 char *lname = NULL;
1617 1620
1618 1621 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1619 1622 CREATE4args *, args);
1620 1623
1621 1624 resp->attrset = 0;
1622 1625
1623 1626 if (dvp == NULL) {
1624 1627 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1625 1628 goto out;
1626 1629 }
1627 1630
1628 1631 /*
1629 1632 * If there is an unshared filesystem mounted on this vnode,
1630 1633 * do not allow to create an object in this directory.
1631 1634 */
1632 1635 if (vn_ismntpt(dvp)) {
1633 1636 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1634 1637 goto out;
1635 1638 }
1636 1639
1637 1640 /* Verify that type is correct */
1638 1641 switch (args->type) {
1639 1642 case NF4LNK:
1640 1643 case NF4BLK:
1641 1644 case NF4CHR:
1642 1645 case NF4SOCK:
1643 1646 case NF4FIFO:
1644 1647 case NF4DIR:
1645 1648 break;
1646 1649 default:
1647 1650 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1648 1651 goto out;
1649 1652 };
1650 1653
1651 1654 if (cs->access == CS_ACCESS_DENIED) {
1652 1655 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1653 1656 goto out;
1654 1657 }
1655 1658 if (dvp->v_type != VDIR) {
1656 1659 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1657 1660 goto out;
1658 1661 }
1659 1662 status = utf8_dir_verify(&args->objname);
1660 1663 if (status != NFS4_OK) {
1661 1664 *cs->statusp = resp->status = status;
1662 1665 goto out;
1663 1666 }
1664 1667
1665 1668 if (rdonly4(req, cs)) {
1666 1669 *cs->statusp = resp->status = NFS4ERR_ROFS;
1667 1670 goto out;
1668 1671 }
1669 1672
1670 1673 /*
1671 1674 * Name of newly created object
1672 1675 */
1673 1676 nm = utf8_to_fn(&args->objname, &len, NULL);
1674 1677 if (nm == NULL) {
1675 1678 *cs->statusp = resp->status = NFS4ERR_INVAL;
1676 1679 goto out;
1677 1680 }
1678 1681
1679 1682 if (len > MAXNAMELEN) {
1680 1683 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1681 1684 kmem_free(nm, len);
1682 1685 goto out;
1683 1686 }
1684 1687
1685 1688 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1686 1689 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1687 1690 MAXPATHLEN + 1);
1688 1691
1689 1692 if (name == NULL) {
1690 1693 *cs->statusp = resp->status = NFS4ERR_INVAL;
1691 1694 kmem_free(nm, len);
1692 1695 goto out;
1693 1696 }
1694 1697
1695 1698 resp->attrset = 0;
1696 1699
1697 1700 sarg.sbp = &sb;
1698 1701 sarg.is_referral = B_FALSE;
1699 1702 nfs4_ntov_table_init(&ntov);
1700 1703
1701 1704 status = do_rfs4_set_attrs(&resp->attrset,
1702 1705 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1703 1706
1704 1707 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1705 1708 status = NFS4ERR_INVAL;
1706 1709
1707 1710 if (status != NFS4_OK) {
1708 1711 *cs->statusp = resp->status = status;
1709 1712 if (name != nm)
1710 1713 kmem_free(name, MAXPATHLEN + 1);
1711 1714 kmem_free(nm, len);
1712 1715 nfs4_ntov_table_free(&ntov, &sarg);
1713 1716 resp->attrset = 0;
1714 1717 goto out;
1715 1718 }
1716 1719
1717 1720 /* Get "before" change value */
1718 1721 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1719 1722 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1720 1723 if (error) {
1721 1724 *cs->statusp = resp->status = puterrno4(error);
1722 1725 if (name != nm)
1723 1726 kmem_free(name, MAXPATHLEN + 1);
1724 1727 kmem_free(nm, len);
1725 1728 nfs4_ntov_table_free(&ntov, &sarg);
1726 1729 resp->attrset = 0;
1727 1730 goto out;
1728 1731 }
1729 1732 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1730 1733
1731 1734 vap = sarg.vap;
1732 1735
1733 1736 /*
1734 1737 * Set the default initial values for attributes when the parent
1735 1738 * directory does not have the VSUID/VSGID bit set and they have
1736 1739 * not been specified in createattrs.
1737 1740 */
1738 1741 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1739 1742 vap->va_uid = crgetuid(cr);
1740 1743 vap->va_mask |= AT_UID;
1741 1744 }
1742 1745 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1743 1746 vap->va_gid = crgetgid(cr);
1744 1747 vap->va_mask |= AT_GID;
1745 1748 }
1746 1749
1747 1750 vap->va_mask |= AT_TYPE;
1748 1751 switch (args->type) {
1749 1752 case NF4DIR:
1750 1753 vap->va_type = VDIR;
1751 1754 if ((vap->va_mask & AT_MODE) == 0) {
1752 1755 vap->va_mode = 0700; /* default: owner rwx only */
1753 1756 vap->va_mask |= AT_MODE;
1754 1757 }
1755 1758 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1756 1759 if (error)
1757 1760 break;
1758 1761
1759 1762 /*
1760 1763 * Get the initial "after" sequence number, if it fails,
1761 1764 * set to zero
1762 1765 */
1763 1766 iva.va_mask = AT_SEQ;
1764 1767 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1765 1768 iva.va_seq = 0;
1766 1769 break;
1767 1770 case NF4LNK:
1768 1771 vap->va_type = VLNK;
1769 1772 if ((vap->va_mask & AT_MODE) == 0) {
1770 1773 vap->va_mode = 0700; /* default: owner rwx only */
1771 1774 vap->va_mask |= AT_MODE;
1772 1775 }
1773 1776
1774 1777 /*
1775 1778 * symlink names must be treated as data
1776 1779 */
1777 1780 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1778 1781 &llen, NULL);
1779 1782
1780 1783 if (lnm == NULL) {
1781 1784 *cs->statusp = resp->status = NFS4ERR_INVAL;
1782 1785 if (name != nm)
1783 1786 kmem_free(name, MAXPATHLEN + 1);
1784 1787 kmem_free(nm, len);
1785 1788 nfs4_ntov_table_free(&ntov, &sarg);
1786 1789 resp->attrset = 0;
1787 1790 goto out;
1788 1791 }
1789 1792
1790 1793 if (llen > MAXPATHLEN) {
1791 1794 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1792 1795 if (name != nm)
1793 1796 kmem_free(name, MAXPATHLEN + 1);
1794 1797 kmem_free(nm, len);
1795 1798 kmem_free(lnm, llen);
1796 1799 nfs4_ntov_table_free(&ntov, &sarg);
1797 1800 resp->attrset = 0;
1798 1801 goto out;
1799 1802 }
1800 1803
1801 1804 lname = nfscmd_convname(ca, cs->exi, lnm,
1802 1805 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1803 1806
1804 1807 if (lname == NULL) {
1805 1808 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1806 1809 if (name != nm)
1807 1810 kmem_free(name, MAXPATHLEN + 1);
1808 1811 kmem_free(nm, len);
1809 1812 kmem_free(lnm, llen);
1810 1813 nfs4_ntov_table_free(&ntov, &sarg);
1811 1814 resp->attrset = 0;
1812 1815 goto out;
1813 1816 }
1814 1817
1815 1818 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1816 1819 if (lname != lnm)
1817 1820 kmem_free(lname, MAXPATHLEN + 1);
1818 1821 kmem_free(lnm, llen);
1819 1822 if (error)
1820 1823 break;
1821 1824
1822 1825 /*
1823 1826 * Get the initial "after" sequence number, if it fails,
1824 1827 * set to zero
1825 1828 */
1826 1829 iva.va_mask = AT_SEQ;
1827 1830 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1828 1831 iva.va_seq = 0;
1829 1832
1830 1833 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1831 1834 NULL, NULL, NULL);
1832 1835 if (error)
1833 1836 break;
1834 1837
1835 1838 /*
1836 1839 * va_seq is not safe over VOP calls, check it again
1837 1840 * if it has changed zero out iva to force atomic = FALSE.
1838 1841 */
1839 1842 iva2.va_mask = AT_SEQ;
1840 1843 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1841 1844 iva2.va_seq != iva.va_seq)
1842 1845 iva.va_seq = 0;
1843 1846 break;
1844 1847 default:
1845 1848 /*
1846 1849 * probably a special file.
1847 1850 */
1848 1851 if ((vap->va_mask & AT_MODE) == 0) {
1849 1852 vap->va_mode = 0600; /* default: owner rw only */
1850 1853 vap->va_mask |= AT_MODE;
1851 1854 }
1852 1855 syncval = FNODSYNC;
1853 1856 /*
1854 1857 * We know this will only generate one VOP call
1855 1858 */
1856 1859 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1857 1860
1858 1861 if (vp == NULL) {
1859 1862 if (name != nm)
1860 1863 kmem_free(name, MAXPATHLEN + 1);
1861 1864 kmem_free(nm, len);
1862 1865 nfs4_ntov_table_free(&ntov, &sarg);
1863 1866 resp->attrset = 0;
1864 1867 goto out;
1865 1868 }
1866 1869
1867 1870 /*
1868 1871 * Get the initial "after" sequence number, if it fails,
1869 1872 * set to zero
1870 1873 */
1871 1874 iva.va_mask = AT_SEQ;
1872 1875 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1873 1876 iva.va_seq = 0;
1874 1877
1875 1878 break;
1876 1879 }
1877 1880 if (name != nm)
1878 1881 kmem_free(name, MAXPATHLEN + 1);
1879 1882 kmem_free(nm, len);
1880 1883
1881 1884 if (error) {
1882 1885 *cs->statusp = resp->status = puterrno4(error);
1883 1886 }
1884 1887
1885 1888 /*
1886 1889 * Force modified data and metadata out to stable storage.
1887 1890 */
1888 1891 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1889 1892
1890 1893 if (resp->status != NFS4_OK) {
1891 1894 if (vp != NULL)
1892 1895 VN_RELE(vp);
1893 1896 nfs4_ntov_table_free(&ntov, &sarg);
1894 1897 resp->attrset = 0;
1895 1898 goto out;
1896 1899 }
1897 1900
1898 1901 /*
1899 1902 * Finish setup of cinfo response, "before" value already set.
1900 1903 * Get "after" change value, if it fails, simply return the
1901 1904 * before value.
1902 1905 */
1903 1906 ava.va_mask = AT_CTIME|AT_SEQ;
1904 1907 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1905 1908 ava.va_ctime = bva.va_ctime;
1906 1909 ava.va_seq = 0;
1907 1910 }
1908 1911 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1909 1912
1910 1913 /*
1911 1914 * True verification that object was created with correct
1912 1915 * attrs is impossible. The attrs could have been changed
1913 1916 * immediately after object creation. If attributes did
1914 1917 * not verify, the only recourse for the server is to
1915 1918 * destroy the object. Maybe if some attrs (like gid)
1916 1919 * are set incorrectly, the object should be destroyed;
1917 1920 * however, seems bad as a default policy. Do we really
1918 1921 * want to destroy an object over one of the times not
1919 1922 * verifying correctly? For these reasons, the server
1920 1923 * currently sets bits in attrset for createattrs
1921 1924 * that were set; however, no verification is done.
1922 1925 *
1923 1926 * vmask_to_nmask accounts for vattr bits set on create
1924 1927 * [do_rfs4_set_attrs() only sets resp bits for
1925 1928 * non-vattr/vfs bits.]
1926 1929 * Mask off any bits set by default so as not to return
1927 1930 * more attrset bits than were requested in createattrs
1928 1931 */
1929 1932 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1930 1933 resp->attrset &= args->createattrs.attrmask;
1931 1934 nfs4_ntov_table_free(&ntov, &sarg);
1932 1935
1933 1936 error = makefh4(&cs->fh, vp, cs->exi);
1934 1937 if (error) {
1935 1938 *cs->statusp = resp->status = puterrno4(error);
1936 1939 }
1937 1940
1938 1941 /*
1939 1942 * The cinfo.atomic = TRUE only if we got no errors, we have
1940 1943 * non-zero va_seq's, and it has incremented by exactly one
1941 1944 * during the creation and it didn't change during the VOP_LOOKUP
1942 1945 * or VOP_FSYNC.
1943 1946 */
1944 1947 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1945 1948 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1946 1949 resp->cinfo.atomic = TRUE;
1947 1950 else
1948 1951 resp->cinfo.atomic = FALSE;
1949 1952
1950 1953 /*
1951 1954 * Force modified metadata out to stable storage.
1952 1955 *
1953 1956 * if a underlying vp exists, pass it to VOP_FSYNC
1954 1957 */
1955 1958 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1956 1959 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1957 1960 else
1958 1961 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1959 1962
1960 1963 if (resp->status != NFS4_OK) {
1961 1964 VN_RELE(vp);
1962 1965 goto out;
1963 1966 }
1964 1967 if (cs->vp)
1965 1968 VN_RELE(cs->vp);
1966 1969
1967 1970 cs->vp = vp;
1968 1971 *cs->statusp = resp->status = NFS4_OK;
1969 1972 out:
1970 1973 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1971 1974 CREATE4res *, resp);
1972 1975 }
1973 1976
1974 1977 /*ARGSUSED*/
1975 1978 static void
1976 1979 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1977 1980 struct compound_state *cs)
1978 1981 {
1979 1982 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1980 1983 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1981 1984
1982 1985 rfs4_op_inval(argop, resop, req, cs);
1983 1986
1984 1987 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1985 1988 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1986 1989 }
1987 1990
1988 1991 /*ARGSUSED*/
1989 1992 static void
1990 1993 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1991 1994 struct compound_state *cs)
1992 1995 {
1993 1996 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1994 1997 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1995 1998 rfs4_deleg_state_t *dsp;
1996 1999 nfsstat4 status;
1997 2000
1998 2001 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1999 2002 DELEGRETURN4args *, args);
2000 2003
2001 2004 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2002 2005 resp->status = *cs->statusp = status;
2003 2006 if (status != NFS4_OK)
2004 2007 goto out;
2005 2008
2006 2009 /* Ensure specified filehandle matches */
2007 2010 if (cs->vp != dsp->rds_finfo->rf_vp) {
2008 2011 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2009 2012 } else
2010 2013 rfs4_return_deleg(dsp, FALSE);
2011 2014
2012 2015 rfs4_update_lease(dsp->rds_client);
2013 2016
2014 2017 rfs4_deleg_state_rele(dsp);
2015 2018 out:
2016 2019 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2017 2020 DELEGRETURN4res *, resp);
2018 2021 }
2019 2022
2020 2023 /*
2021 2024 * Check to see if a given "flavor" is an explicitly shared flavor.
2022 2025 * The assumption of this routine is the "flavor" is already a valid
2023 2026 * flavor in the secinfo list of "exi".
2024 2027 *
2025 2028 * e.g.
2026 2029 * # share -o sec=flavor1 /export
2027 2030 * # share -o sec=flavor2 /export/home
2028 2031 *
2029 2032 * flavor2 is not an explicitly shared flavor for /export,
2030 2033 * however it is in the secinfo list for /export thru the
2031 2034 * server namespace setup.
2032 2035 */
2033 2036 int
2034 2037 is_exported_sec(int flavor, struct exportinfo *exi)
2035 2038 {
2036 2039 int i;
2037 2040 struct secinfo *sp;
2038 2041
2039 2042 sp = exi->exi_export.ex_secinfo;
2040 2043 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2041 2044 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2042 2045 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2043 2046 return (SEC_REF_EXPORTED(&sp[i]));
2044 2047 }
2045 2048 }
2046 2049
2047 2050 /* Should not reach this point based on the assumption */
2048 2051 return (0);
2049 2052 }
2050 2053
2051 2054 /*
2052 2055 * Check if the security flavor used in the request matches what is
2053 2056 * required at the export point or at the root pseudo node (exi_root).
2054 2057 *
2055 2058 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2056 2059 *
2057 2060 */
2058 2061 static int
2059 2062 secinfo_match_or_authnone(struct compound_state *cs)
2060 2063 {
2061 2064 int i;
2062 2065 struct secinfo *sp;
2063 2066
2064 2067 /*
2065 2068 * Check cs->nfsflavor (from the request) against
2066 2069 * the current export data in cs->exi.
2067 2070 */
2068 2071 sp = cs->exi->exi_export.ex_secinfo;
2069 2072 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2070 2073 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2071 2074 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2072 2075 return (1);
2073 2076 }
2074 2077
2075 2078 return (0);
2076 2079 }
2077 2080
2078 2081 /*
2079 2082 * Check the access authority for the client and return the correct error.
2080 2083 */
2081 2084 nfsstat4
2082 2085 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2083 2086 {
2084 2087 int authres;
2085 2088
2086 2089 /*
2087 2090 * First, check if the security flavor used in the request
2088 2091 * are among the flavors set in the server namespace.
2089 2092 */
2090 2093 if (!secinfo_match_or_authnone(cs)) {
2091 2094 *cs->statusp = NFS4ERR_WRONGSEC;
2092 2095 return (*cs->statusp);
2093 2096 }
2094 2097
2095 2098 authres = checkauth4(cs, req);
2096 2099
2097 2100 if (authres > 0) {
2098 2101 *cs->statusp = NFS4_OK;
2099 2102 if (! (cs->access & CS_ACCESS_LIMITED))
2100 2103 cs->access = CS_ACCESS_OK;
2101 2104 } else if (authres == 0) {
2102 2105 *cs->statusp = NFS4ERR_ACCESS;
2103 2106 } else if (authres == -2) {
2104 2107 *cs->statusp = NFS4ERR_WRONGSEC;
2105 2108 } else {
2106 2109 *cs->statusp = NFS4ERR_DELAY;
2107 2110 }
2108 2111 return (*cs->statusp);
2109 2112 }
2110 2113
2111 2114 /*
2112 2115 * bitmap4_to_attrmask is called by getattr and readdir.
2113 2116 * It sets up the vattr mask and determines whether vfsstat call is needed
2114 2117 * based on the input bitmap.
2115 2118 * Returns nfsv4 status.
2116 2119 */
2117 2120 static nfsstat4
2118 2121 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2119 2122 {
2120 2123 int i;
2121 2124 uint_t va_mask;
2122 2125 struct statvfs64 *sbp = sargp->sbp;
2123 2126
2124 2127 sargp->sbp = NULL;
2125 2128 sargp->flag = 0;
2126 2129 sargp->rdattr_error = NFS4_OK;
2127 2130 sargp->mntdfid_set = FALSE;
2128 2131 if (sargp->cs->vp)
2129 2132 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2130 2133 FH4_ATTRDIR | FH4_NAMEDATTR);
2131 2134 else
2132 2135 sargp->xattr = 0;
2133 2136
2134 2137 /*
2135 2138 * Set rdattr_error_req to true if return error per
2136 2139 * failed entry rather than fail the readdir.
2137 2140 */
2138 2141 if (breq & FATTR4_RDATTR_ERROR_MASK)
2139 2142 sargp->rdattr_error_req = 1;
2140 2143 else
2141 2144 sargp->rdattr_error_req = 0;
2142 2145
2143 2146 /*
2144 2147 * generate the va_mask
2145 2148 * Handle the easy cases first
2146 2149 */
2147 2150 switch (breq) {
2148 2151 case NFS4_NTOV_ATTR_MASK:
2149 2152 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2150 2153 return (NFS4_OK);
2151 2154
2152 2155 case NFS4_FS_ATTR_MASK:
2153 2156 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2154 2157 sargp->sbp = sbp;
2155 2158 return (NFS4_OK);
2156 2159
2157 2160 case NFS4_NTOV_ATTR_CACHE_MASK:
2158 2161 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2159 2162 return (NFS4_OK);
2160 2163
2161 2164 case FATTR4_LEASE_TIME_MASK:
2162 2165 sargp->vap->va_mask = 0;
2163 2166 return (NFS4_OK);
2164 2167
2165 2168 default:
2166 2169 va_mask = 0;
2167 2170 for (i = 0; i < nfs4_ntov_map_size; i++) {
2168 2171 if ((breq & nfs4_ntov_map[i].fbit) &&
2169 2172 nfs4_ntov_map[i].vbit)
2170 2173 va_mask |= nfs4_ntov_map[i].vbit;
2171 2174 }
2172 2175
2173 2176 /*
2174 2177 * Check is vfsstat is needed
2175 2178 */
2176 2179 if (breq & NFS4_FS_ATTR_MASK)
2177 2180 sargp->sbp = sbp;
2178 2181
2179 2182 sargp->vap->va_mask = va_mask;
2180 2183 return (NFS4_OK);
2181 2184 }
2182 2185 /* NOTREACHED */
2183 2186 }
2184 2187
2185 2188 /*
2186 2189 * bitmap4_get_sysattrs is called by getattr and readdir.
2187 2190 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2188 2191 * Returns nfsv4 status.
2189 2192 */
2190 2193 static nfsstat4
2191 2194 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2192 2195 {
2193 2196 int error;
2194 2197 struct compound_state *cs = sargp->cs;
2195 2198 vnode_t *vp = cs->vp;
2196 2199
2197 2200 if (sargp->sbp != NULL) {
2198 2201 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2199 2202 sargp->sbp = NULL; /* to identify error */
2200 2203 return (puterrno4(error));
2201 2204 }
2202 2205 }
2203 2206
2204 2207 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2205 2208 }
2206 2209
2207 2210 static void
2208 2211 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2209 2212 {
2210 2213 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2211 2214 KM_SLEEP);
2212 2215 ntovp->attrcnt = 0;
2213 2216 ntovp->vfsstat = FALSE;
2214 2217 }
2215 2218
2216 2219 static void
2217 2220 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2218 2221 struct nfs4_svgetit_arg *sargp)
2219 2222 {
2220 2223 int i;
2221 2224 union nfs4_attr_u *na;
2222 2225 uint8_t *amap;
2223 2226
2224 2227 /*
2225 2228 * XXX Should do the same checks for whether the bit is set
2226 2229 */
2227 2230 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2228 2231 i < ntovp->attrcnt; i++, na++, amap++) {
2229 2232 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2230 2233 NFS4ATTR_FREEIT, sargp, na);
2231 2234 }
2232 2235 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2233 2236 /*
2234 2237 * xdr_free for getattr will be done later
2235 2238 */
2236 2239 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2237 2240 i < ntovp->attrcnt; i++, na++, amap++) {
2238 2241 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2239 2242 }
2240 2243 }
2241 2244 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2242 2245 }
2243 2246
2244 2247 /*
2245 2248 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2246 2249 */
2247 2250 static nfsstat4
2248 2251 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2249 2252 struct nfs4_svgetit_arg *sargp)
2250 2253 {
2251 2254 int error = 0;
2252 2255 int i, k;
2253 2256 struct nfs4_ntov_table ntov;
2254 2257 XDR xdr;
2255 2258 ulong_t xdr_size;
2256 2259 char *xdr_attrs;
2257 2260 nfsstat4 status = NFS4_OK;
2258 2261 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2259 2262 union nfs4_attr_u *na;
2260 2263 uint8_t *amap;
2261 2264
2262 2265 sargp->op = NFS4ATTR_GETIT;
2263 2266 sargp->flag = 0;
2264 2267
2265 2268 fattrp->attrmask = 0;
2266 2269 /* if no bits requested, then return empty fattr4 */
2267 2270 if (breq == 0) {
2268 2271 fattrp->attrlist4_len = 0;
2269 2272 fattrp->attrlist4 = NULL;
2270 2273 return (NFS4_OK);
2271 2274 }
2272 2275
2273 2276 /*
2274 2277 * return NFS4ERR_INVAL when client requests write-only attrs
2275 2278 */
2276 2279 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2277 2280 return (NFS4ERR_INVAL);
2278 2281
2279 2282 nfs4_ntov_table_init(&ntov);
2280 2283 na = ntov.na;
2281 2284 amap = ntov.amap;
2282 2285
2283 2286 /*
2284 2287 * Now loop to get or verify the attrs
2285 2288 */
2286 2289 for (i = 0; i < nfs4_ntov_map_size; i++) {
2287 2290 if (breq & nfs4_ntov_map[i].fbit) {
2288 2291 if ((*nfs4_ntov_map[i].sv_getit)(
2289 2292 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2290 2293
2291 2294 error = (*nfs4_ntov_map[i].sv_getit)(
2292 2295 NFS4ATTR_GETIT, sargp, na);
2293 2296
2294 2297 /*
2295 2298 * Possible error values:
2296 2299 * >0 if sv_getit failed to
2297 2300 * get the attr; 0 if succeeded;
2298 2301 * <0 if rdattr_error and the
2299 2302 * attribute cannot be returned.
2300 2303 */
2301 2304 if (error && !(sargp->rdattr_error_req))
2302 2305 goto done;
2303 2306 /*
2304 2307 * If error then just for entry
2305 2308 */
2306 2309 if (error == 0) {
2307 2310 fattrp->attrmask |=
2308 2311 nfs4_ntov_map[i].fbit;
2309 2312 *amap++ =
2310 2313 (uint8_t)nfs4_ntov_map[i].nval;
2311 2314 na++;
2312 2315 (ntov.attrcnt)++;
2313 2316 } else if ((error > 0) &&
2314 2317 (sargp->rdattr_error == NFS4_OK)) {
2315 2318 sargp->rdattr_error = puterrno4(error);
2316 2319 }
2317 2320 error = 0;
2318 2321 }
2319 2322 }
2320 2323 }
2321 2324
2322 2325 /*
2323 2326 * If rdattr_error was set after the return value for it was assigned,
2324 2327 * update it.
2325 2328 */
2326 2329 if (prev_rdattr_error != sargp->rdattr_error) {
2327 2330 na = ntov.na;
2328 2331 amap = ntov.amap;
2329 2332 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2330 2333 k = *amap;
2331 2334 if (k < FATTR4_RDATTR_ERROR) {
2332 2335 continue;
2333 2336 }
2334 2337 if ((k == FATTR4_RDATTR_ERROR) &&
2335 2338 ((*nfs4_ntov_map[k].sv_getit)(
2336 2339 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2337 2340
2338 2341 (void) (*nfs4_ntov_map[k].sv_getit)(
2339 2342 NFS4ATTR_GETIT, sargp, na);
2340 2343 }
2341 2344 break;
2342 2345 }
2343 2346 }
2344 2347
2345 2348 xdr_size = 0;
2346 2349 na = ntov.na;
2347 2350 amap = ntov.amap;
2348 2351 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2349 2352 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2350 2353 }
2351 2354
2352 2355 fattrp->attrlist4_len = xdr_size;
2353 2356 if (xdr_size) {
2354 2357 /* freed by rfs4_op_getattr_free() */
2355 2358 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2356 2359
2357 2360 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2358 2361
2359 2362 na = ntov.na;
2360 2363 amap = ntov.amap;
2361 2364 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2362 2365 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2363 2366 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2364 2367 int, *amap);
2365 2368 status = NFS4ERR_SERVERFAULT;
2366 2369 break;
2367 2370 }
2368 2371 }
2369 2372 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2370 2373 } else {
2371 2374 fattrp->attrlist4 = NULL;
2372 2375 }
2373 2376 done:
2374 2377
2375 2378 nfs4_ntov_table_free(&ntov, sargp);
2376 2379
2377 2380 if (error != 0)
2378 2381 status = puterrno4(error);
2379 2382
2380 2383 return (status);
2381 2384 }
2382 2385
2383 2386 /* ARGSUSED */
2384 2387 static void
2385 2388 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2386 2389 struct compound_state *cs)
2387 2390 {
2388 2391 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2389 2392 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2390 2393 struct nfs4_svgetit_arg sarg;
2391 2394 struct statvfs64 sb;
2392 2395 nfsstat4 status;
2393 2396
2394 2397 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2395 2398 GETATTR4args *, args);
2396 2399
2397 2400 if (cs->vp == NULL) {
2398 2401 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2399 2402 goto out;
2400 2403 }
2401 2404
2402 2405 if (cs->access == CS_ACCESS_DENIED) {
2403 2406 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2404 2407 goto out;
2405 2408 }
2406 2409
2407 2410 sarg.sbp = &sb;
2408 2411 sarg.cs = cs;
2409 2412 sarg.is_referral = B_FALSE;
2410 2413
2411 2414 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2412 2415 if (status == NFS4_OK) {
2413 2416
2414 2417 status = bitmap4_get_sysattrs(&sarg);
2415 2418 if (status == NFS4_OK) {
2416 2419
2417 2420 /* Is this a referral? */
2418 2421 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2419 2422 /* Older V4 Solaris client sees a link */
2420 2423 if (client_is_downrev(req))
2421 2424 sarg.vap->va_type = VLNK;
2422 2425 else
2423 2426 sarg.is_referral = B_TRUE;
2424 2427 }
2425 2428
2426 2429 status = do_rfs4_op_getattr(args->attr_request,
2427 2430 &resp->obj_attributes, &sarg);
2428 2431 }
2429 2432 }
2430 2433 *cs->statusp = resp->status = status;
2431 2434 out:
2432 2435 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2433 2436 GETATTR4res *, resp);
2434 2437 }
2435 2438
2436 2439 static void
2437 2440 rfs4_op_getattr_free(nfs_resop4 *resop)
2438 2441 {
2439 2442 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2440 2443
2441 2444 nfs4_fattr4_free(&resp->obj_attributes);
2442 2445 }
2443 2446
2444 2447 /* ARGSUSED */
2445 2448 static void
2446 2449 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2447 2450 struct compound_state *cs)
2448 2451 {
2449 2452 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2450 2453
2451 2454 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2452 2455
2453 2456 if (cs->vp == NULL) {
2454 2457 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2455 2458 goto out;
2456 2459 }
2457 2460 if (cs->access == CS_ACCESS_DENIED) {
2458 2461 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2459 2462 goto out;
2460 2463 }
2461 2464
2462 2465 /* check for reparse point at the share point */
2463 2466 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2464 2467 /* it's all bad */
2465 2468 cs->exi->exi_moved = 1;
2466 2469 *cs->statusp = resp->status = NFS4ERR_MOVED;
2467 2470 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2468 2471 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2469 2472 return;
2470 2473 }
2471 2474
2472 2475 /* check for reparse point at vp */
2473 2476 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2474 2477 /* it's not all bad */
2475 2478 *cs->statusp = resp->status = NFS4ERR_MOVED;
2476 2479 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2477 2480 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2478 2481 return;
2479 2482 }
2480 2483
2481 2484 resp->object.nfs_fh4_val =
2482 2485 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2483 2486 nfs_fh4_copy(&cs->fh, &resp->object);
2484 2487 *cs->statusp = resp->status = NFS4_OK;
2485 2488 out:
2486 2489 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2487 2490 GETFH4res *, resp);
2488 2491 }
2489 2492
2490 2493 static void
2491 2494 rfs4_op_getfh_free(nfs_resop4 *resop)
2492 2495 {
2493 2496 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2494 2497
2495 2498 if (resp->status == NFS4_OK &&
2496 2499 resp->object.nfs_fh4_val != NULL) {
2497 2500 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2498 2501 resp->object.nfs_fh4_val = NULL;
2499 2502 resp->object.nfs_fh4_len = 0;
2500 2503 }
2501 2504 }
2502 2505
2503 2506 /*
2504 2507 * illegal: args: void
2505 2508 * res : status (NFS4ERR_OP_ILLEGAL)
2506 2509 */
2507 2510 /* ARGSUSED */
2508 2511 static void
2509 2512 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2510 2513 struct svc_req *req, struct compound_state *cs)
2511 2514 {
2512 2515 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2513 2516
2514 2517 resop->resop = OP_ILLEGAL;
2515 2518 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2516 2519 }
2517 2520
2518 2521 /*
2519 2522 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2520 2523 * res: status. If success - CURRENT_FH unchanged, return change_info
2521 2524 */
2522 2525 /* ARGSUSED */
2523 2526 static void
2524 2527 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2525 2528 struct compound_state *cs)
2526 2529 {
2527 2530 LINK4args *args = &argop->nfs_argop4_u.oplink;
2528 2531 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2529 2532 int error;
2530 2533 vnode_t *vp;
2531 2534 vnode_t *dvp;
2532 2535 struct vattr bdva, idva, adva;
2533 2536 char *nm;
2534 2537 uint_t len;
2535 2538 struct sockaddr *ca;
2536 2539 char *name = NULL;
2537 2540 nfsstat4 status;
2538 2541
2539 2542 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2540 2543 LINK4args *, args);
2541 2544
2542 2545 /* SAVED_FH: source object */
2543 2546 vp = cs->saved_vp;
2544 2547 if (vp == NULL) {
2545 2548 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2546 2549 goto out;
2547 2550 }
2548 2551
2549 2552 /* CURRENT_FH: target directory */
2550 2553 dvp = cs->vp;
2551 2554 if (dvp == NULL) {
2552 2555 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2553 2556 goto out;
2554 2557 }
2555 2558
2556 2559 /*
2557 2560 * If there is a non-shared filesystem mounted on this vnode,
2558 2561 * do not allow to link any file in this directory.
2559 2562 */
2560 2563 if (vn_ismntpt(dvp)) {
2561 2564 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2562 2565 goto out;
2563 2566 }
2564 2567
2565 2568 if (cs->access == CS_ACCESS_DENIED) {
2566 2569 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2567 2570 goto out;
2568 2571 }
2569 2572
2570 2573 /* Check source object's type validity */
2571 2574 if (vp->v_type == VDIR) {
2572 2575 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2573 2576 goto out;
2574 2577 }
2575 2578
2576 2579 /* Check target directory's type */
2577 2580 if (dvp->v_type != VDIR) {
2578 2581 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2579 2582 goto out;
2580 2583 }
2581 2584
2582 2585 if (cs->saved_exi != cs->exi) {
2583 2586 *cs->statusp = resp->status = NFS4ERR_XDEV;
2584 2587 goto out;
2585 2588 }
2586 2589
2587 2590 status = utf8_dir_verify(&args->newname);
2588 2591 if (status != NFS4_OK) {
2589 2592 *cs->statusp = resp->status = status;
2590 2593 goto out;
2591 2594 }
2592 2595
2593 2596 nm = utf8_to_fn(&args->newname, &len, NULL);
2594 2597 if (nm == NULL) {
2595 2598 *cs->statusp = resp->status = NFS4ERR_INVAL;
2596 2599 goto out;
2597 2600 }
2598 2601
2599 2602 if (len > MAXNAMELEN) {
2600 2603 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2601 2604 kmem_free(nm, len);
2602 2605 goto out;
2603 2606 }
2604 2607
2605 2608 if (rdonly4(req, cs)) {
2606 2609 *cs->statusp = resp->status = NFS4ERR_ROFS;
2607 2610 kmem_free(nm, len);
2608 2611 goto out;
2609 2612 }
2610 2613
2611 2614 /* Get "before" change value */
2612 2615 bdva.va_mask = AT_CTIME|AT_SEQ;
2613 2616 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2614 2617 if (error) {
2615 2618 *cs->statusp = resp->status = puterrno4(error);
2616 2619 kmem_free(nm, len);
2617 2620 goto out;
2618 2621 }
2619 2622
2620 2623 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2621 2624 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2622 2625 MAXPATHLEN + 1);
2623 2626
2624 2627 if (name == NULL) {
2625 2628 *cs->statusp = resp->status = NFS4ERR_INVAL;
2626 2629 kmem_free(nm, len);
2627 2630 goto out;
2628 2631 }
2629 2632
2630 2633 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2631 2634
2632 2635 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2633 2636
2634 2637 if (nm != name)
2635 2638 kmem_free(name, MAXPATHLEN + 1);
2636 2639 kmem_free(nm, len);
2637 2640
2638 2641 /*
2639 2642 * Get the initial "after" sequence number, if it fails, set to zero
2640 2643 */
2641 2644 idva.va_mask = AT_SEQ;
2642 2645 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2643 2646 idva.va_seq = 0;
2644 2647
2645 2648 /*
2646 2649 * Force modified data and metadata out to stable storage.
2647 2650 */
2648 2651 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2649 2652 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2650 2653
2651 2654 if (error) {
2652 2655 *cs->statusp = resp->status = puterrno4(error);
2653 2656 goto out;
2654 2657 }
2655 2658
2656 2659 /*
2657 2660 * Get "after" change value, if it fails, simply return the
2658 2661 * before value.
2659 2662 */
2660 2663 adva.va_mask = AT_CTIME|AT_SEQ;
2661 2664 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2662 2665 adva.va_ctime = bdva.va_ctime;
2663 2666 adva.va_seq = 0;
2664 2667 }
2665 2668
2666 2669 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2667 2670
2668 2671 /*
2669 2672 * The cinfo.atomic = TRUE only if we have
2670 2673 * non-zero va_seq's, and it has incremented by exactly one
2671 2674 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2672 2675 */
2673 2676 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2674 2677 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2675 2678 resp->cinfo.atomic = TRUE;
2676 2679 else
2677 2680 resp->cinfo.atomic = FALSE;
2678 2681
2679 2682 *cs->statusp = resp->status = NFS4_OK;
2680 2683 out:
2681 2684 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2682 2685 LINK4res *, resp);
2683 2686 }
2684 2687
2685 2688 /*
2686 2689 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2687 2690 */
2688 2691
2689 2692 /* ARGSUSED */
2690 2693 static nfsstat4
2691 2694 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2692 2695 {
2693 2696 int error;
2694 2697 int different_export = 0;
2695 2698 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2696 2699 struct exportinfo *exi = NULL, *pre_exi = NULL;
2697 2700 nfsstat4 stat;
2698 2701 fid_t fid;
2699 2702 int attrdir, dotdot, walk;
2700 2703 bool_t is_newvp = FALSE;
2701 2704
2702 2705 if (cs->vp->v_flag & V_XATTRDIR) {
2703 2706 attrdir = 1;
2704 2707 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2705 2708 } else {
2706 2709 attrdir = 0;
2707 2710 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2708 2711 }
2709 2712
2710 2713 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2711 2714
2712 2715 /*
2713 2716 * If dotdotting, then need to check whether it's
2714 2717 * above the root of a filesystem, or above an
2715 2718 * export point.
2716 2719 */
2717 2720 if (dotdot) {
2718 2721
2719 2722 /*
2720 2723 * If dotdotting at the root of a filesystem, then
2721 2724 * need to traverse back to the mounted-on filesystem
2722 2725 * and do the dotdot lookup there.
2723 2726 */
2724 2727 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2725 2728
2726 2729 /*
2727 2730 * If at the system root, then can
2728 2731 * go up no further.
2729 2732 */
2730 2733 if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2731 2734 return (puterrno4(ENOENT));
2732 2735
2733 2736 /*
2734 2737 * Traverse back to the mounted-on filesystem
2735 2738 */
2736 2739 cs->vp = untraverse(cs->vp);
2737 2740
2738 2741 /*
2739 2742 * Set the different_export flag so we remember
2740 2743 * to pick up a new exportinfo entry for
2741 2744 * this new filesystem.
2742 2745 */
2743 2746 different_export = 1;
2744 2747 } else {
2745 2748
2746 2749 /*
2747 2750 * If dotdotting above an export point then set
2748 2751 * the different_export to get new export info.
2749 2752 */
2750 2753 different_export = nfs_exported(cs->exi, cs->vp);
2751 2754 }
2752 2755 }
2753 2756
2754 2757 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2755 2758 NULL, NULL, NULL);
2756 2759 if (error)
2757 2760 return (puterrno4(error));
2758 2761
2759 2762 /*
2760 2763 * If the vnode is in a pseudo filesystem, check whether it is visible.
2761 2764 *
2762 2765 * XXX if the vnode is a symlink and it is not visible in
2763 2766 * a pseudo filesystem, return ENOENT (not following symlink).
2764 2767 * V4 client can not mount such symlink. This is a regression
2765 2768 * from V2/V3.
2766 2769 *
2767 2770 * In the same exported filesystem, if the security flavor used
2768 2771 * is not an explicitly shared flavor, limit the view to the visible
2769 2772 * list entries only. This is not a WRONGSEC case because it's already
2770 2773 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2771 2774 */
2772 2775 if (!different_export &&
2773 2776 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2774 2777 cs->access & CS_ACCESS_LIMITED)) {
2775 2778 if (! nfs_visible(cs->exi, vp, &different_export)) {
2776 2779 VN_RELE(vp);
2777 2780 return (puterrno4(ENOENT));
2778 2781 }
2779 2782 }
2780 2783
2781 2784 /*
2782 2785 * If it's a mountpoint, then traverse it.
2783 2786 */
2784 2787 if (vn_ismntpt(vp)) {
2785 2788 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2786 2789 pre_tvp = vp; /* save pre-traversed vnode */
2787 2790
2788 2791 /*
2789 2792 * hold pre_tvp to counteract rele by traverse. We will
2790 2793 * need pre_tvp below if checkexport4 fails
2791 2794 */
2792 2795 VN_HOLD(pre_tvp);
2793 2796 if ((error = traverse(&vp)) != 0) {
2794 2797 VN_RELE(vp);
2795 2798 VN_RELE(pre_tvp);
2796 2799 return (puterrno4(error));
2797 2800 }
2798 2801 different_export = 1;
2799 2802 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2800 2803 /*
2801 2804 * The vfsp comparison is to handle the case where
2802 2805 * a LOFS mount is shared. lo_lookup traverses mount points,
2803 2806 * and NFS is unaware of local fs transistions because
2804 2807 * v_vfsmountedhere isn't set. For this special LOFS case,
2805 2808 * the dir and the obj returned by lookup will have different
2806 2809 * vfs ptrs.
2807 2810 */
2808 2811 different_export = 1;
2809 2812 }
2810 2813
2811 2814 if (different_export) {
2812 2815
2813 2816 bzero(&fid, sizeof (fid));
2814 2817 fid.fid_len = MAXFIDSZ;
2815 2818 error = vop_fid_pseudo(vp, &fid);
2816 2819 if (error) {
2817 2820 VN_RELE(vp);
2818 2821 if (pre_tvp)
2819 2822 VN_RELE(pre_tvp);
2820 2823 return (puterrno4(error));
2821 2824 }
2822 2825
2823 2826 if (dotdot)
2824 2827 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2825 2828 else
2826 2829 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2827 2830
2828 2831 if (exi == NULL) {
2829 2832 if (pre_tvp) {
2830 2833 /*
2831 2834 * If this vnode is a mounted-on vnode,
2832 2835 * but the mounted-on file system is not
2833 2836 * exported, send back the filehandle for
2834 2837 * the mounted-on vnode, not the root of
2835 2838 * the mounted-on file system.
2836 2839 */
2837 2840 VN_RELE(vp);
2838 2841 vp = pre_tvp;
2839 2842 exi = pre_exi;
2840 2843 } else {
2841 2844 VN_RELE(vp);
2842 2845 return (puterrno4(EACCES));
2843 2846 }
2844 2847 } else if (pre_tvp) {
2845 2848 /* we're done with pre_tvp now. release extra hold */
2846 2849 VN_RELE(pre_tvp);
2847 2850 }
2848 2851
2849 2852 cs->exi = exi;
2850 2853
2851 2854 /*
2852 2855 * Now we do a checkauth4. The reason is that
2853 2856 * this client/user may not have access to the new
2854 2857 * exported file system, and if they do,
2855 2858 * the client/user may be mapped to a different uid.
2856 2859 *
2857 2860 * We start with a new cr, because the checkauth4 done
2858 2861 * in the PUT*FH operation over wrote the cred's uid,
2859 2862 * gid, etc, and we want the real thing before calling
2860 2863 * checkauth4()
2861 2864 */
2862 2865 crfree(cs->cr);
2863 2866 cs->cr = crdup(cs->basecr);
2864 2867
2865 2868 oldvp = cs->vp;
2866 2869 cs->vp = vp;
2867 2870 is_newvp = TRUE;
2868 2871
2869 2872 stat = call_checkauth4(cs, req);
2870 2873 if (stat != NFS4_OK) {
2871 2874 VN_RELE(cs->vp);
2872 2875 cs->vp = oldvp;
2873 2876 return (stat);
2874 2877 }
2875 2878 }
2876 2879
2877 2880 /*
2878 2881 * After various NFS checks, do a label check on the path
2879 2882 * component. The label on this path should either be the
2880 2883 * global zone's label or a zone's label. We are only
2881 2884 * interested in the zone's label because exported files
2882 2885 * in global zone is accessible (though read-only) to
2883 2886 * clients. The exportability/visibility check is already
2884 2887 * done before reaching this code.
2885 2888 */
2886 2889 if (is_system_labeled()) {
2887 2890 bslabel_t *clabel;
2888 2891
2889 2892 ASSERT(req->rq_label != NULL);
2890 2893 clabel = req->rq_label;
2891 2894 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2892 2895 "got client label from request(1)", struct svc_req *, req);
2893 2896
2894 2897 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2895 2898 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2896 2899 cs->exi)) {
2897 2900 error = EACCES;
2898 2901 goto err_out;
2899 2902 }
2900 2903 } else {
2901 2904 /*
2902 2905 * We grant access to admin_low label clients
2903 2906 * only if the client is trusted, i.e. also
2904 2907 * running Solaris Trusted Extension.
2905 2908 */
2906 2909 struct sockaddr *ca;
2907 2910 int addr_type;
2908 2911 void *ipaddr;
2909 2912 tsol_tpc_t *tp;
2910 2913
2911 2914 ca = (struct sockaddr *)svc_getrpccaller(
2912 2915 req->rq_xprt)->buf;
2913 2916 if (ca->sa_family == AF_INET) {
2914 2917 addr_type = IPV4_VERSION;
2915 2918 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2916 2919 } else if (ca->sa_family == AF_INET6) {
2917 2920 addr_type = IPV6_VERSION;
2918 2921 ipaddr = &((struct sockaddr_in6 *)
2919 2922 ca)->sin6_addr;
2920 2923 }
2921 2924 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2922 2925 if (tp == NULL || tp->tpc_tp.tp_doi !=
2923 2926 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2924 2927 SUN_CIPSO) {
2925 2928 if (tp != NULL)
2926 2929 TPC_RELE(tp);
2927 2930 error = EACCES;
2928 2931 goto err_out;
2929 2932 }
2930 2933 TPC_RELE(tp);
2931 2934 }
2932 2935 }
2933 2936
2934 2937 error = makefh4(&cs->fh, vp, cs->exi);
2935 2938
2936 2939 err_out:
2937 2940 if (error) {
2938 2941 if (is_newvp) {
2939 2942 VN_RELE(cs->vp);
2940 2943 cs->vp = oldvp;
2941 2944 } else
2942 2945 VN_RELE(vp);
2943 2946 return (puterrno4(error));
2944 2947 }
2945 2948
2946 2949 if (!is_newvp) {
2947 2950 if (cs->vp)
2948 2951 VN_RELE(cs->vp);
2949 2952 cs->vp = vp;
2950 2953 } else if (oldvp)
2951 2954 VN_RELE(oldvp);
2952 2955
2953 2956 /*
2954 2957 * if did lookup on attrdir and didn't lookup .., set named
2955 2958 * attr fh flag
2956 2959 */
2957 2960 if (attrdir && ! dotdot)
2958 2961 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2959 2962
2960 2963 /* Assume false for now, open proc will set this */
2961 2964 cs->mandlock = FALSE;
2962 2965
2963 2966 return (NFS4_OK);
2964 2967 }
2965 2968
2966 2969 /* ARGSUSED */
2967 2970 static void
2968 2971 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2969 2972 struct compound_state *cs)
2970 2973 {
2971 2974 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2972 2975 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2973 2976 char *nm;
2974 2977 uint_t len;
2975 2978 struct sockaddr *ca;
2976 2979 char *name = NULL;
2977 2980 nfsstat4 status;
2978 2981
2979 2982 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2980 2983 LOOKUP4args *, args);
2981 2984
2982 2985 if (cs->vp == NULL) {
2983 2986 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2984 2987 goto out;
2985 2988 }
2986 2989
2987 2990 if (cs->vp->v_type == VLNK) {
2988 2991 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2989 2992 goto out;
2990 2993 }
2991 2994
2992 2995 if (cs->vp->v_type != VDIR) {
2993 2996 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2994 2997 goto out;
2995 2998 }
2996 2999
2997 3000 status = utf8_dir_verify(&args->objname);
2998 3001 if (status != NFS4_OK) {
2999 3002 *cs->statusp = resp->status = status;
3000 3003 goto out;
3001 3004 }
3002 3005
3003 3006 nm = utf8_to_str(&args->objname, &len, NULL);
3004 3007 if (nm == NULL) {
3005 3008 *cs->statusp = resp->status = NFS4ERR_INVAL;
3006 3009 goto out;
3007 3010 }
3008 3011
3009 3012 if (len > MAXNAMELEN) {
3010 3013 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3011 3014 kmem_free(nm, len);
3012 3015 goto out;
3013 3016 }
3014 3017
3015 3018 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3016 3019 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3017 3020 MAXPATHLEN + 1);
3018 3021
3019 3022 if (name == NULL) {
3020 3023 *cs->statusp = resp->status = NFS4ERR_INVAL;
3021 3024 kmem_free(nm, len);
3022 3025 goto out;
3023 3026 }
3024 3027
3025 3028 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3026 3029
3027 3030 if (name != nm)
3028 3031 kmem_free(name, MAXPATHLEN + 1);
3029 3032 kmem_free(nm, len);
3030 3033
3031 3034 out:
3032 3035 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3033 3036 LOOKUP4res *, resp);
3034 3037 }
3035 3038
3036 3039 /* ARGSUSED */
3037 3040 static void
3038 3041 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3039 3042 struct compound_state *cs)
3040 3043 {
3041 3044 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3042 3045
3043 3046 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3044 3047
3045 3048 if (cs->vp == NULL) {
3046 3049 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3047 3050 goto out;
3048 3051 }
3049 3052
3050 3053 if (cs->vp->v_type != VDIR) {
3051 3054 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3052 3055 goto out;
3053 3056 }
3054 3057
3055 3058 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3056 3059
3057 3060 /*
3058 3061 * From NFSV4 Specification, LOOKUPP should not check for
3059 3062 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3060 3063 */
3061 3064 if (resp->status == NFS4ERR_WRONGSEC) {
3062 3065 *cs->statusp = resp->status = NFS4_OK;
3063 3066 }
3064 3067
3065 3068 out:
3066 3069 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3067 3070 LOOKUPP4res *, resp);
3068 3071 }
3069 3072
3070 3073
3071 3074 /*ARGSUSED2*/
3072 3075 static void
3073 3076 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3074 3077 struct compound_state *cs)
3075 3078 {
3076 3079 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
3077 3080 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
3078 3081 vnode_t *avp = NULL;
3079 3082 int lookup_flags = LOOKUP_XATTR, error;
3080 3083 int exp_ro = 0;
3081 3084
3082 3085 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3083 3086 OPENATTR4args *, args);
3084 3087
3085 3088 if (cs->vp == NULL) {
3086 3089 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3087 3090 goto out;
3088 3091 }
3089 3092
3090 3093 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3091 3094 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3092 3095 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3093 3096 goto out;
3094 3097 }
3095 3098
3096 3099 /*
3097 3100 * If file system supports passing ACE mask to VOP_ACCESS then
3098 3101 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3099 3102 */
3100 3103
3101 3104 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3102 3105 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3103 3106 V_ACE_MASK, cs->cr, NULL);
3104 3107 else
3105 3108 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3106 3109 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3107 3110 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3108 3111
3109 3112 if (error) {
3110 3113 *cs->statusp = resp->status = puterrno4(EACCES);
3111 3114 goto out;
3112 3115 }
3113 3116
3114 3117 /*
3115 3118 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3116 3119 * the file system is exported read-only -- regardless of
3117 3120 * createdir flag. Otherwise the attrdir would be created
3118 3121 * (assuming server fs isn't mounted readonly locally). If
3119 3122 * VOP_LOOKUP returns ENOENT in this case, the error will
3120 3123 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3121 3124 * because specfs has no VOP_LOOKUP op, so the macro would
3122 3125 * return ENOSYS. EINVAL is returned by all (current)
3123 3126 * Solaris file system implementations when any of their
3124 3127 * restrictions are violated (xattr(dir) can't have xattrdir).
3125 3128 * Returning NOTSUPP is more appropriate in this case
3126 3129 * because the object will never be able to have an attrdir.
3127 3130 */
3128 3131 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3129 3132 lookup_flags |= CREATE_XATTR_DIR;
3130 3133
3131 3134 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3132 3135 NULL, NULL, NULL);
3133 3136
3134 3137 if (error) {
3135 3138 if (error == ENOENT && args->createdir && exp_ro)
3136 3139 *cs->statusp = resp->status = puterrno4(EROFS);
3137 3140 else if (error == EINVAL || error == ENOSYS)
3138 3141 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3139 3142 else
3140 3143 *cs->statusp = resp->status = puterrno4(error);
3141 3144 goto out;
3142 3145 }
3143 3146
3144 3147 ASSERT(avp->v_flag & V_XATTRDIR);
3145 3148
3146 3149 error = makefh4(&cs->fh, avp, cs->exi);
3147 3150
3148 3151 if (error) {
3149 3152 VN_RELE(avp);
3150 3153 *cs->statusp = resp->status = puterrno4(error);
3151 3154 goto out;
3152 3155 }
3153 3156
3154 3157 VN_RELE(cs->vp);
3155 3158 cs->vp = avp;
3156 3159
3157 3160 /*
3158 3161 * There is no requirement for an attrdir fh flag
3159 3162 * because the attrdir has a vnode flag to distinguish
3160 3163 * it from regular (non-xattr) directories. The
3161 3164 * FH4_ATTRDIR flag is set for future sanity checks.
3162 3165 */
3163 3166 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3164 3167 *cs->statusp = resp->status = NFS4_OK;
3165 3168
3166 3169 out:
3167 3170 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3168 3171 OPENATTR4res *, resp);
3169 3172 }
3170 3173
3171 3174 static int
3172 3175 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3173 3176 caller_context_t *ct)
3174 3177 {
3175 3178 int error;
3176 3179 int i;
3177 3180 clock_t delaytime;
3178 3181
3179 3182 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3180 3183
3181 3184 /*
3182 3185 * Don't block on mandatory locks. If this routine returns
3183 3186 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3184 3187 */
3185 3188 uio->uio_fmode = FNONBLOCK;
3186 3189
3187 3190 for (i = 0; i < rfs4_maxlock_tries; i++) {
3188 3191
3189 3192
3190 3193 if (direction == FREAD) {
3191 3194 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3192 3195 error = VOP_READ(vp, uio, ioflag, cred, ct);
3193 3196 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3194 3197 } else {
3195 3198 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3196 3199 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3197 3200 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3198 3201 }
3199 3202
3200 3203 if (error != EAGAIN)
3201 3204 break;
3202 3205
3203 3206 if (i < rfs4_maxlock_tries - 1) {
3204 3207 delay(delaytime);
3205 3208 delaytime *= 2;
3206 3209 }
3207 3210 }
3208 3211
3209 3212 return (error);
3210 3213 }
3211 3214
3212 3215 /* ARGSUSED */
3213 3216 static void
3214 3217 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3215 3218 struct compound_state *cs)
3216 3219 {
3217 3220 READ4args *args = &argop->nfs_argop4_u.opread;
3218 3221 READ4res *resp = &resop->nfs_resop4_u.opread;
3219 3222 int error;
3220 3223 int verror;
3221 3224 vnode_t *vp;
3222 3225 struct vattr va;
3223 3226 struct iovec iov, *iovp = NULL;
3224 3227 int iovcnt;
3225 3228 struct uio uio;
3226 3229 u_offset_t offset;
3227 3230 bool_t *deleg = &cs->deleg;
3228 3231 nfsstat4 stat;
3229 3232 int in_crit = 0;
3230 3233 mblk_t *mp = NULL;
3231 3234 int alloc_err = 0;
3232 3235 int rdma_used = 0;
3233 3236 int loaned_buffers;
3234 3237 caller_context_t ct;
3235 3238 struct uio *uiop;
3236 3239
3237 3240 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3238 3241 READ4args, args);
3239 3242
3240 3243 vp = cs->vp;
3241 3244 if (vp == NULL) {
3242 3245 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3243 3246 goto out;
3244 3247 }
3245 3248 if (cs->access == CS_ACCESS_DENIED) {
3246 3249 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3247 3250 goto out;
3248 3251 }
3249 3252
3250 3253 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3251 3254 deleg, TRUE, &ct)) != NFS4_OK) {
3252 3255 *cs->statusp = resp->status = stat;
3253 3256 goto out;
3254 3257 }
3255 3258
3256 3259 /*
3257 3260 * Enter the critical region before calling VOP_RWLOCK
3258 3261 * to avoid a deadlock with write requests.
3259 3262 */
3260 3263 if (nbl_need_check(vp)) {
3261 3264 nbl_start_crit(vp, RW_READER);
3262 3265 in_crit = 1;
3263 3266 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3264 3267 &ct)) {
3265 3268 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3266 3269 goto out;
3267 3270 }
3268 3271 }
3269 3272
3270 3273 if (args->wlist) {
3271 3274 if (args->count > clist_len(args->wlist)) {
3272 3275 *cs->statusp = resp->status = NFS4ERR_INVAL;
3273 3276 goto out;
3274 3277 }
3275 3278 rdma_used = 1;
3276 3279 }
3277 3280
3278 3281 /* use loaned buffers for TCP */
3279 3282 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3280 3283
3281 3284 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3282 3285 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3283 3286
3284 3287 /*
3285 3288 * If we can't get the attributes, then we can't do the
3286 3289 * right access checking. So, we'll fail the request.
3287 3290 */
3288 3291 if (verror) {
3289 3292 *cs->statusp = resp->status = puterrno4(verror);
3290 3293 goto out;
3291 3294 }
3292 3295
3293 3296 if (vp->v_type != VREG) {
3294 3297 *cs->statusp = resp->status =
3295 3298 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3296 3299 goto out;
3297 3300 }
3298 3301
3299 3302 if (crgetuid(cs->cr) != va.va_uid &&
3300 3303 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3301 3304 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3302 3305 *cs->statusp = resp->status = puterrno4(error);
3303 3306 goto out;
3304 3307 }
3305 3308
3306 3309 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3307 3310 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3308 3311 goto out;
3309 3312 }
3310 3313
3311 3314 offset = args->offset;
3312 3315 if (offset >= va.va_size) {
3313 3316 *cs->statusp = resp->status = NFS4_OK;
3314 3317 resp->eof = TRUE;
3315 3318 resp->data_len = 0;
3316 3319 resp->data_val = NULL;
3317 3320 resp->mblk = NULL;
3318 3321 /* RDMA */
3319 3322 resp->wlist = args->wlist;
3320 3323 resp->wlist_len = resp->data_len;
3321 3324 *cs->statusp = resp->status = NFS4_OK;
3322 3325 if (resp->wlist)
3323 3326 clist_zero_len(resp->wlist);
3324 3327 goto out;
3325 3328 }
3326 3329
3327 3330 if (args->count == 0) {
3328 3331 *cs->statusp = resp->status = NFS4_OK;
3329 3332 resp->eof = FALSE;
3330 3333 resp->data_len = 0;
3331 3334 resp->data_val = NULL;
3332 3335 resp->mblk = NULL;
3333 3336 /* RDMA */
3334 3337 resp->wlist = args->wlist;
3335 3338 resp->wlist_len = resp->data_len;
3336 3339 if (resp->wlist)
3337 3340 clist_zero_len(resp->wlist);
3338 3341 goto out;
3339 3342 }
3340 3343
3341 3344 /*
3342 3345 * Do not allocate memory more than maximum allowed
3343 3346 * transfer size
3344 3347 */
3345 3348 if (args->count > rfs4_tsize(req))
3346 3349 args->count = rfs4_tsize(req);
3347 3350
3348 3351 if (loaned_buffers) {
3349 3352 uiop = (uio_t *)rfs_setup_xuio(vp);
3350 3353 ASSERT(uiop != NULL);
3351 3354 uiop->uio_segflg = UIO_SYSSPACE;
3352 3355 uiop->uio_loffset = args->offset;
3353 3356 uiop->uio_resid = args->count;
3354 3357
3355 3358 /* Jump to do the read if successful */
3356 3359 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3357 3360 /*
3358 3361 * Need to hold the vnode until after VOP_RETZCBUF()
3359 3362 * is called.
3360 3363 */
3361 3364 VN_HOLD(vp);
3362 3365 goto doio_read;
3363 3366 }
3364 3367
3365 3368 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3366 3369 uiop->uio_loffset, int, uiop->uio_resid);
3367 3370
3368 3371 uiop->uio_extflg = 0;
3369 3372
3370 3373 /* failure to setup for zero copy */
3371 3374 rfs_free_xuio((void *)uiop);
3372 3375 loaned_buffers = 0;
3373 3376 }
3374 3377
3375 3378 /*
3376 3379 * If returning data via RDMA Write, then grab the chunk list. If we
3377 3380 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3378 3381 */
3379 3382 if (rdma_used) {
3380 3383 mp = NULL;
3381 3384 (void) rdma_get_wchunk(req, &iov, args->wlist);
3382 3385 uio.uio_iov = &iov;
3383 3386 uio.uio_iovcnt = 1;
3384 3387 } else {
3385 3388 /*
3386 3389 * mp will contain the data to be sent out in the read reply.
3387 3390 * It will be freed after the reply has been sent.
3388 3391 */
3389 3392 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3390 3393 ASSERT(mp != NULL);
3391 3394 ASSERT(alloc_err == 0);
3392 3395 uio.uio_iov = iovp;
3393 3396 uio.uio_iovcnt = iovcnt;
3394 3397 }
3395 3398
3396 3399 uio.uio_segflg = UIO_SYSSPACE;
3397 3400 uio.uio_extflg = UIO_COPY_CACHED;
3398 3401 uio.uio_loffset = args->offset;
3399 3402 uio.uio_resid = args->count;
3400 3403 uiop = &uio;
3401 3404
3402 3405 doio_read:
3403 3406 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3404 3407
3405 3408 va.va_mask = AT_SIZE;
3406 3409 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3407 3410
3408 3411 if (error) {
3409 3412 if (mp)
3410 3413 freemsg(mp);
3411 3414 *cs->statusp = resp->status = puterrno4(error);
3412 3415 goto out;
3413 3416 }
3414 3417
3415 3418 /* make mblk using zc buffers */
3416 3419 if (loaned_buffers) {
3417 3420 mp = uio_to_mblk(uiop);
3418 3421 ASSERT(mp != NULL);
3419 3422 }
3420 3423
3421 3424 *cs->statusp = resp->status = NFS4_OK;
3422 3425
3423 3426 ASSERT(uiop->uio_resid >= 0);
3424 3427 resp->data_len = args->count - uiop->uio_resid;
3425 3428 if (mp) {
3426 3429 resp->data_val = (char *)mp->b_datap->db_base;
3427 3430 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3428 3431 } else {
3429 3432 resp->data_val = (caddr_t)iov.iov_base;
3430 3433 }
3431 3434
3432 3435 resp->mblk = mp;
3433 3436
3434 3437 if (!verror && offset + resp->data_len == va.va_size)
3435 3438 resp->eof = TRUE;
3436 3439 else
3437 3440 resp->eof = FALSE;
3438 3441
3439 3442 if (rdma_used) {
3440 3443 if (!rdma_setup_read_data4(args, resp)) {
3441 3444 *cs->statusp = resp->status = NFS4ERR_INVAL;
3442 3445 }
3443 3446 } else {
3444 3447 resp->wlist = NULL;
3445 3448 }
3446 3449
3447 3450 out:
3448 3451 if (in_crit)
3449 3452 nbl_end_crit(vp);
3450 3453
3451 3454 if (iovp != NULL)
3452 3455 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3453 3456
3454 3457 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3455 3458 READ4res *, resp);
3456 3459 }
3457 3460
3458 3461 static void
3459 3462 rfs4_op_read_free(nfs_resop4 *resop)
3460 3463 {
3461 3464 READ4res *resp = &resop->nfs_resop4_u.opread;
3462 3465
3463 3466 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3464 3467 freemsg(resp->mblk);
3465 3468 resp->mblk = NULL;
3466 3469 resp->data_val = NULL;
3467 3470 resp->data_len = 0;
3468 3471 }
3469 3472 }
3470 3473
3471 3474 static void
3472 3475 rfs4_op_readdir_free(nfs_resop4 * resop)
3473 3476 {
3474 3477 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3475 3478
3476 3479 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3477 3480 freeb(resp->mblk);
3478 3481 resp->mblk = NULL;
3479 3482 resp->data_len = 0;
3480 3483 }
3481 3484 }
3482 3485
3483 3486
3484 3487 /* ARGSUSED */
3485 3488 static void
3486 3489 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3487 3490 struct compound_state *cs)
3488 3491 {
3489 3492 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3490 3493 int error;
3491 3494 vnode_t *vp;
3492 3495 struct exportinfo *exi, *sav_exi;
3493 3496 nfs_fh4_fmt_t *fh_fmtp;
3494 3497 nfs_export_t *ne = nfs_get_export();
3495 3498
3496 3499 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3497 3500
3498 3501 if (cs->vp) {
3499 3502 VN_RELE(cs->vp);
3500 3503 cs->vp = NULL;
3501 3504 }
3502 3505
3503 3506 if (cs->cr)
3504 3507 crfree(cs->cr);
3505 3508
3506 3509 cs->cr = crdup(cs->basecr);
3507 3510
3508 3511 vp = ne->exi_public->exi_vp;
3509 3512 if (vp == NULL) {
3510 3513 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3511 3514 goto out;
3512 3515 }
3513 3516
3514 3517 error = makefh4(&cs->fh, vp, ne->exi_public);
3515 3518 if (error != 0) {
3516 3519 *cs->statusp = resp->status = puterrno4(error);
3517 3520 goto out;
3518 3521 }
3519 3522 sav_exi = cs->exi;
3520 3523 if (ne->exi_public == ne->exi_root) {
3521 3524 /*
3522 3525 * No filesystem is actually shared public, so we default
3523 3526 * to exi_root. In this case, we must check whether root
3524 3527 * is exported.
3525 3528 */
3526 3529 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3527 3530
3528 3531 /*
3529 3532 * if root filesystem is exported, the exportinfo struct that we
3530 3533 * should use is what checkexport4 returns, because root_exi is
3531 3534 * actually a mostly empty struct.
3532 3535 */
3533 3536 exi = checkexport4(&fh_fmtp->fh4_fsid,
3534 3537 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3535 3538 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3536 3539 } else {
3537 3540 /*
3538 3541 * it's a properly shared filesystem
3539 3542 */
3540 3543 cs->exi = ne->exi_public;
3541 3544 }
3542 3545
3543 3546 if (is_system_labeled()) {
3544 3547 bslabel_t *clabel;
3545 3548
3546 3549 ASSERT(req->rq_label != NULL);
3547 3550 clabel = req->rq_label;
3548 3551 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3549 3552 "got client label from request(1)",
3550 3553 struct svc_req *, req);
3551 3554 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3552 3555 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3553 3556 cs->exi)) {
3554 3557 *cs->statusp = resp->status =
3555 3558 NFS4ERR_SERVERFAULT;
3556 3559 goto out;
3557 3560 }
3558 3561 }
3559 3562 }
3560 3563
3561 3564 VN_HOLD(vp);
3562 3565 cs->vp = vp;
3563 3566
3564 3567 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3565 3568 VN_RELE(cs->vp);
3566 3569 cs->vp = NULL;
3567 3570 cs->exi = sav_exi;
3568 3571 goto out;
3569 3572 }
3570 3573
3571 3574 *cs->statusp = resp->status = NFS4_OK;
3572 3575 out:
3573 3576 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3574 3577 PUTPUBFH4res *, resp);
3575 3578 }
3576 3579
3577 3580 /*
3578 3581 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3579 3582 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3580 3583 * or joe have restrictive search permissions, then we shouldn't let
3581 3584 * the client get a file handle. This is easy to enforce. However, we
3582 3585 * don't know what security flavor should be used until we resolve the
3583 3586 * path name. Another complication is uid mapping. If root is
3584 3587 * the user, then it will be mapped to the anonymous user by default,
3585 3588 * but we won't know that till we've resolved the path name. And we won't
3586 3589 * know what the anonymous user is.
3587 3590 * Luckily, SECINFO is specified to take a full filename.
3588 3591 * So what we will have to in rfs4_op_lookup is check that flavor of
3589 3592 * the target object matches that of the request, and if root was the
3590 3593 * caller, check for the root= and anon= options, and if necessary,
3591 3594 * repeat the lookup using the right cred_t. But that's not done yet.
3592 3595 */
3593 3596 /* ARGSUSED */
3594 3597 static void
3595 3598 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3596 3599 struct compound_state *cs)
3597 3600 {
3598 3601 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3599 3602 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3600 3603 nfs_fh4_fmt_t *fh_fmtp;
3601 3604
3602 3605 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3603 3606 PUTFH4args *, args);
3604 3607
3605 3608 if (cs->vp) {
3606 3609 VN_RELE(cs->vp);
3607 3610 cs->vp = NULL;
3608 3611 }
3609 3612
3610 3613 if (cs->cr) {
3611 3614 crfree(cs->cr);
3612 3615 cs->cr = NULL;
3613 3616 }
3614 3617
3615 3618
3616 3619 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3617 3620 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3618 3621 goto out;
3619 3622 }
3620 3623
3621 3624 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3622 3625 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3623 3626 NULL);
3624 3627
3625 3628 if (cs->exi == NULL) {
3626 3629 *cs->statusp = resp->status = NFS4ERR_STALE;
3627 3630 goto out;
3628 3631 }
3629 3632
3630 3633 cs->cr = crdup(cs->basecr);
3631 3634
3632 3635 ASSERT(cs->cr != NULL);
3633 3636
3634 3637 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3635 3638 *cs->statusp = resp->status;
3636 3639 goto out;
3637 3640 }
3638 3641
3639 3642 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3640 3643 VN_RELE(cs->vp);
3641 3644 cs->vp = NULL;
3642 3645 goto out;
3643 3646 }
3644 3647
3645 3648 nfs_fh4_copy(&args->object, &cs->fh);
3646 3649 *cs->statusp = resp->status = NFS4_OK;
3647 3650 cs->deleg = FALSE;
3648 3651
3649 3652 out:
3650 3653 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3651 3654 PUTFH4res *, resp);
3652 3655 }
3653 3656
3654 3657 /* ARGSUSED */
3655 3658 static void
3656 3659 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3657 3660 struct compound_state *cs)
3658 3661 {
3659 3662 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3660 3663 int error;
3661 3664 fid_t fid;
3662 3665 struct exportinfo *exi, *sav_exi;
3663 3666
3664 3667 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3665 3668
3666 3669 if (cs->vp) {
3667 3670 VN_RELE(cs->vp);
3668 3671 cs->vp = NULL;
3669 3672 }
3670 3673
3671 3674 if (cs->cr)
3672 3675 crfree(cs->cr);
3673 3676
3674 3677 cs->cr = crdup(cs->basecr);
3675 3678
3676 3679 /*
3677 3680 * Using rootdir, the system root vnode,
3678 3681 * get its fid.
3679 3682 */
3680 3683 bzero(&fid, sizeof (fid));
3681 3684 fid.fid_len = MAXFIDSZ;
3682 3685 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3683 3686 if (error != 0) {
3684 3687 *cs->statusp = resp->status = puterrno4(error);
3685 3688 goto out;
3686 3689 }
3687 3690
3688 3691 /*
3689 3692 * Then use the root fsid & fid it to find out if it's exported
3690 3693 *
3691 3694 * If the server root isn't exported directly, then
3692 3695 * it should at least be a pseudo export based on
3693 3696 * one or more exports further down in the server's
3694 3697 * file tree.
3695 3698 */
3696 3699 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3697 3700 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3698 3701 NFS4_DEBUG(rfs4_debug,
3699 3702 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3700 3703 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3701 3704 goto out;
3702 3705 }
3703 3706
3704 3707 /*
3705 3708 * Now make a filehandle based on the root
3706 3709 * export and root vnode.
3707 3710 */
3708 3711 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3709 3712 if (error != 0) {
3710 3713 *cs->statusp = resp->status = puterrno4(error);
3711 3714 goto out;
3712 3715 }
3713 3716
3714 3717 sav_exi = cs->exi;
3715 3718 cs->exi = exi;
3716 3719
3717 3720 VN_HOLD(ZONE_ROOTVP());
3718 3721 cs->vp = ZONE_ROOTVP();
3719 3722
3720 3723 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3721 3724 VN_RELE(cs->vp);
3722 3725 cs->vp = NULL;
3723 3726 cs->exi = sav_exi;
3724 3727 goto out;
3725 3728 }
3726 3729
3727 3730 *cs->statusp = resp->status = NFS4_OK;
3728 3731 cs->deleg = FALSE;
3729 3732 out:
3730 3733 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3731 3734 PUTROOTFH4res *, resp);
3732 3735 }
3733 3736
3734 3737 /*
3735 3738 * readlink: args: CURRENT_FH.
3736 3739 * res: status. If success - CURRENT_FH unchanged, return linktext.
3737 3740 */
3738 3741
3739 3742 /* ARGSUSED */
3740 3743 static void
3741 3744 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3742 3745 struct compound_state *cs)
3743 3746 {
3744 3747 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3745 3748 int error;
3746 3749 vnode_t *vp;
3747 3750 struct iovec iov;
3748 3751 struct vattr va;
3749 3752 struct uio uio;
3750 3753 char *data;
3751 3754 struct sockaddr *ca;
3752 3755 char *name = NULL;
3753 3756 int is_referral;
3754 3757
3755 3758 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3756 3759
3757 3760 /* CURRENT_FH: directory */
3758 3761 vp = cs->vp;
3759 3762 if (vp == NULL) {
3760 3763 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3761 3764 goto out;
3762 3765 }
3763 3766
3764 3767 if (cs->access == CS_ACCESS_DENIED) {
3765 3768 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3766 3769 goto out;
3767 3770 }
3768 3771
3769 3772 /* Is it a referral? */
3770 3773 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3771 3774
3772 3775 is_referral = 1;
3773 3776
3774 3777 } else {
3775 3778
3776 3779 is_referral = 0;
3777 3780
3778 3781 if (vp->v_type == VDIR) {
3779 3782 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3780 3783 goto out;
3781 3784 }
3782 3785
3783 3786 if (vp->v_type != VLNK) {
3784 3787 *cs->statusp = resp->status = NFS4ERR_INVAL;
3785 3788 goto out;
3786 3789 }
3787 3790
3788 3791 }
3789 3792
3790 3793 va.va_mask = AT_MODE;
3791 3794 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3792 3795 if (error) {
3793 3796 *cs->statusp = resp->status = puterrno4(error);
3794 3797 goto out;
3795 3798 }
3796 3799
3797 3800 if (MANDLOCK(vp, va.va_mode)) {
3798 3801 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3799 3802 goto out;
3800 3803 }
3801 3804
3802 3805 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3803 3806
3804 3807 if (is_referral) {
3805 3808 char *s;
3806 3809 size_t strsz;
3807 3810
3808 3811 /* Get an artificial symlink based on a referral */
3809 3812 s = build_symlink(vp, cs->cr, &strsz);
3810 3813 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3811 3814 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3812 3815 vnode_t *, vp, char *, s);
3813 3816 if (s == NULL)
3814 3817 error = EINVAL;
3815 3818 else {
3816 3819 error = 0;
3817 3820 (void) strlcpy(data, s, MAXPATHLEN + 1);
3818 3821 kmem_free(s, strsz);
3819 3822 }
3820 3823
3821 3824 } else {
3822 3825
3823 3826 iov.iov_base = data;
3824 3827 iov.iov_len = MAXPATHLEN;
3825 3828 uio.uio_iov = &iov;
3826 3829 uio.uio_iovcnt = 1;
3827 3830 uio.uio_segflg = UIO_SYSSPACE;
3828 3831 uio.uio_extflg = UIO_COPY_CACHED;
3829 3832 uio.uio_loffset = 0;
3830 3833 uio.uio_resid = MAXPATHLEN;
3831 3834
3832 3835 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3833 3836
3834 3837 if (!error)
3835 3838 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3836 3839 }
3837 3840
3838 3841 if (error) {
3839 3842 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3840 3843 *cs->statusp = resp->status = puterrno4(error);
3841 3844 goto out;
3842 3845 }
3843 3846
3844 3847 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3845 3848 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3846 3849 MAXPATHLEN + 1);
3847 3850
3848 3851 if (name == NULL) {
3849 3852 /*
3850 3853 * Even though the conversion failed, we return
3851 3854 * something. We just don't translate it.
3852 3855 */
3853 3856 name = data;
3854 3857 }
3855 3858
3856 3859 /*
3857 3860 * treat link name as data
3858 3861 */
3859 3862 (void) str_to_utf8(name, (utf8string *)&resp->link);
3860 3863
3861 3864 if (name != data)
3862 3865 kmem_free(name, MAXPATHLEN + 1);
3863 3866 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3864 3867 *cs->statusp = resp->status = NFS4_OK;
3865 3868
3866 3869 out:
3867 3870 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3868 3871 READLINK4res *, resp);
3869 3872 }
3870 3873
3871 3874 static void
3872 3875 rfs4_op_readlink_free(nfs_resop4 *resop)
3873 3876 {
3874 3877 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3875 3878 utf8string *symlink = (utf8string *)&resp->link;
3876 3879
3877 3880 if (symlink->utf8string_val) {
3878 3881 UTF8STRING_FREE(*symlink)
3879 3882 }
3880 3883 }
3881 3884
3882 3885 /*
3883 3886 * release_lockowner:
3884 3887 * Release any state associated with the supplied
3885 3888 * lockowner. Note if any lo_state is holding locks we will not
3886 3889 * rele that lo_state and thus the lockowner will not be destroyed.
3887 3890 * A client using lock after the lock owner stateid has been released
3888 3891 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3889 3892 * to reissue the lock with new_lock_owner set to TRUE.
3890 3893 * args: lock_owner
3891 3894 * res: status
3892 3895 */
3893 3896 /* ARGSUSED */
3894 3897 static void
3895 3898 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3896 3899 struct svc_req *req, struct compound_state *cs)
3897 3900 {
3898 3901 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3899 3902 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3900 3903 rfs4_lockowner_t *lo;
3901 3904 rfs4_openowner_t *oo;
3902 3905 rfs4_state_t *sp;
3903 3906 rfs4_lo_state_t *lsp;
3904 3907 rfs4_client_t *cp;
3905 3908 bool_t create = FALSE;
3906 3909 locklist_t *llist;
3907 3910 sysid_t sysid;
3908 3911
3909 3912 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3910 3913 cs, RELEASE_LOCKOWNER4args *, ap);
3911 3914
3912 3915 /* Make sure there is a clientid around for this request */
3913 3916 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3914 3917
3915 3918 if (cp == NULL) {
3916 3919 *cs->statusp = resp->status =
3917 3920 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3918 3921 goto out;
3919 3922 }
3920 3923 rfs4_client_rele(cp);
3921 3924
3922 3925 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3923 3926 if (lo == NULL) {
3924 3927 *cs->statusp = resp->status = NFS4_OK;
3925 3928 goto out;
3926 3929 }
3927 3930 ASSERT(lo->rl_client != NULL);
3928 3931
3929 3932 /*
3930 3933 * Check for EXPIRED client. If so will reap state with in a lease
3931 3934 * period or on next set_clientid_confirm step
3932 3935 */
3933 3936 if (rfs4_lease_expired(lo->rl_client)) {
3934 3937 rfs4_lockowner_rele(lo);
3935 3938 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3936 3939 goto out;
3937 3940 }
3938 3941
3939 3942 /*
3940 3943 * If no sysid has been assigned, then no locks exist; just return.
3941 3944 */
3942 3945 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3943 3946 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3944 3947 rfs4_lockowner_rele(lo);
3945 3948 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3946 3949 goto out;
3947 3950 }
3948 3951
3949 3952 sysid = lo->rl_client->rc_sysidt;
3950 3953 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3951 3954
3952 3955 /*
3953 3956 * Mark the lockowner invalid.
3954 3957 */
3955 3958 rfs4_dbe_hide(lo->rl_dbe);
3956 3959
3957 3960 /*
3958 3961 * sysid-pid pair should now not be used since the lockowner is
3959 3962 * invalid. If the client were to instantiate the lockowner again
3960 3963 * it would be assigned a new pid. Thus we can get the list of
3961 3964 * current locks.
3962 3965 */
3963 3966
3964 3967 llist = flk_get_active_locks(sysid, lo->rl_pid);
3965 3968 /* If we are still holding locks fail */
3966 3969 if (llist != NULL) {
3967 3970
3968 3971 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3969 3972
3970 3973 flk_free_locklist(llist);
3971 3974 /*
3972 3975 * We need to unhide the lockowner so the client can
3973 3976 * try it again. The bad thing here is if the client
3974 3977 * has a logic error that took it here in the first place
3975 3978 * they probably have lost accounting of the locks that it
3976 3979 * is holding. So we may have dangling state until the
3977 3980 * open owner state is reaped via close. One scenario
3978 3981 * that could possibly occur is that the client has
3979 3982 * sent the unlock request(s) in separate threads
3980 3983 * and has not waited for the replies before sending the
3981 3984 * RELEASE_LOCKOWNER request. Presumably, it would expect
3982 3985 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3983 3986 * reissuing the request.
3984 3987 */
3985 3988 rfs4_dbe_unhide(lo->rl_dbe);
3986 3989 rfs4_lockowner_rele(lo);
3987 3990 goto out;
3988 3991 }
3989 3992
3990 3993 /*
3991 3994 * For the corresponding client we need to check each open
3992 3995 * owner for any opens that have lockowner state associated
3993 3996 * with this lockowner.
3994 3997 */
3995 3998
3996 3999 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3997 4000 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3998 4001 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3999 4002
4000 4003 rfs4_dbe_lock(oo->ro_dbe);
4001 4004 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4002 4005 sp = list_next(&oo->ro_statelist, sp)) {
4003 4006
4004 4007 rfs4_dbe_lock(sp->rs_dbe);
4005 4008 for (lsp = list_head(&sp->rs_lostatelist);
4006 4009 lsp != NULL;
4007 4010 lsp = list_next(&sp->rs_lostatelist, lsp)) {
4008 4011 if (lsp->rls_locker == lo) {
4009 4012 rfs4_dbe_lock(lsp->rls_dbe);
4010 4013 rfs4_dbe_invalidate(lsp->rls_dbe);
4011 4014 rfs4_dbe_unlock(lsp->rls_dbe);
4012 4015 }
4013 4016 }
4014 4017 rfs4_dbe_unlock(sp->rs_dbe);
4015 4018 }
4016 4019 rfs4_dbe_unlock(oo->ro_dbe);
4017 4020 }
4018 4021 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4019 4022
4020 4023 rfs4_lockowner_rele(lo);
4021 4024
4022 4025 *cs->statusp = resp->status = NFS4_OK;
4023 4026
4024 4027 out:
4025 4028 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4026 4029 cs, RELEASE_LOCKOWNER4res *, resp);
4027 4030 }
4028 4031
4029 4032 /*
4030 4033 * short utility function to lookup a file and recall the delegation
4031 4034 */
4032 4035 static rfs4_file_t *
4033 4036 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4034 4037 int *lkup_error, cred_t *cr)
4035 4038 {
4036 4039 vnode_t *vp;
4037 4040 rfs4_file_t *fp = NULL;
4038 4041 bool_t fcreate = FALSE;
4039 4042 int error;
4040 4043
4041 4044 if (vpp)
4042 4045 *vpp = NULL;
4043 4046
4044 4047 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4045 4048 NULL)) == 0) {
4046 4049 if (vp->v_type == VREG)
4047 4050 fp = rfs4_findfile(vp, NULL, &fcreate);
4048 4051 if (vpp)
4049 4052 *vpp = vp;
4050 4053 else
4051 4054 VN_RELE(vp);
4052 4055 }
4053 4056
4054 4057 if (lkup_error)
4055 4058 *lkup_error = error;
4056 4059
4057 4060 return (fp);
4058 4061 }
4059 4062
4060 4063 /*
4061 4064 * remove: args: CURRENT_FH: directory; name.
4062 4065 * res: status. If success - CURRENT_FH unchanged, return change_info
4063 4066 * for directory.
4064 4067 */
4065 4068 /* ARGSUSED */
4066 4069 static void
4067 4070 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4068 4071 struct compound_state *cs)
4069 4072 {
4070 4073 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4071 4074 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4072 4075 int error;
4073 4076 vnode_t *dvp, *vp;
4074 4077 struct vattr bdva, idva, adva;
4075 4078 char *nm;
4076 4079 uint_t len;
4077 4080 rfs4_file_t *fp;
4078 4081 int in_crit = 0;
4079 4082 bslabel_t *clabel;
4080 4083 struct sockaddr *ca;
4081 4084 char *name = NULL;
4082 4085 nfsstat4 status;
4083 4086
4084 4087 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4085 4088 REMOVE4args *, args);
4086 4089
4087 4090 /* CURRENT_FH: directory */
4088 4091 dvp = cs->vp;
4089 4092 if (dvp == NULL) {
4090 4093 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4091 4094 goto out;
4092 4095 }
4093 4096
4094 4097 if (cs->access == CS_ACCESS_DENIED) {
4095 4098 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4096 4099 goto out;
4097 4100 }
4098 4101
4099 4102 /*
4100 4103 * If there is an unshared filesystem mounted on this vnode,
4101 4104 * Do not allow to remove anything in this directory.
4102 4105 */
4103 4106 if (vn_ismntpt(dvp)) {
4104 4107 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4105 4108 goto out;
4106 4109 }
4107 4110
4108 4111 if (dvp->v_type != VDIR) {
4109 4112 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4110 4113 goto out;
4111 4114 }
4112 4115
4113 4116 status = utf8_dir_verify(&args->target);
4114 4117 if (status != NFS4_OK) {
4115 4118 *cs->statusp = resp->status = status;
4116 4119 goto out;
4117 4120 }
4118 4121
4119 4122 /*
4120 4123 * Lookup the file so that we can check if it's a directory
4121 4124 */
4122 4125 nm = utf8_to_fn(&args->target, &len, NULL);
4123 4126 if (nm == NULL) {
4124 4127 *cs->statusp = resp->status = NFS4ERR_INVAL;
4125 4128 goto out;
4126 4129 }
4127 4130
4128 4131 if (len > MAXNAMELEN) {
4129 4132 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4130 4133 kmem_free(nm, len);
4131 4134 goto out;
4132 4135 }
4133 4136
4134 4137 if (rdonly4(req, cs)) {
4135 4138 *cs->statusp = resp->status = NFS4ERR_ROFS;
4136 4139 kmem_free(nm, len);
4137 4140 goto out;
4138 4141 }
4139 4142
4140 4143 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4141 4144 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4142 4145 MAXPATHLEN + 1);
4143 4146
4144 4147 if (name == NULL) {
4145 4148 *cs->statusp = resp->status = NFS4ERR_INVAL;
4146 4149 kmem_free(nm, len);
4147 4150 goto out;
4148 4151 }
4149 4152
4150 4153 /*
4151 4154 * Lookup the file to determine type and while we are see if
4152 4155 * there is a file struct around and check for delegation.
4153 4156 * We don't need to acquire va_seq before this lookup, if
4154 4157 * it causes an update, cinfo.before will not match, which will
4155 4158 * trigger a cache flush even if atomic is TRUE.
4156 4159 */
4157 4160 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4158 4161 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4159 4162 NULL)) {
4160 4163 VN_RELE(vp);
4161 4164 rfs4_file_rele(fp);
4162 4165 *cs->statusp = resp->status = NFS4ERR_DELAY;
4163 4166 if (nm != name)
4164 4167 kmem_free(name, MAXPATHLEN + 1);
4165 4168 kmem_free(nm, len);
4166 4169 goto out;
4167 4170 }
4168 4171 }
4169 4172
4170 4173 /* Didn't find anything to remove */
4171 4174 if (vp == NULL) {
4172 4175 *cs->statusp = resp->status = error;
4173 4176 if (nm != name)
4174 4177 kmem_free(name, MAXPATHLEN + 1);
4175 4178 kmem_free(nm, len);
4176 4179 goto out;
4177 4180 }
4178 4181
4179 4182 if (nbl_need_check(vp)) {
4180 4183 nbl_start_crit(vp, RW_READER);
4181 4184 in_crit = 1;
4182 4185 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4183 4186 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4184 4187 if (nm != name)
4185 4188 kmem_free(name, MAXPATHLEN + 1);
4186 4189 kmem_free(nm, len);
4187 4190 nbl_end_crit(vp);
4188 4191 VN_RELE(vp);
4189 4192 if (fp) {
4190 4193 rfs4_clear_dont_grant(fp);
4191 4194 rfs4_file_rele(fp);
4192 4195 }
4193 4196 goto out;
4194 4197 }
4195 4198 }
4196 4199
4197 4200 /* check label before allowing removal */
4198 4201 if (is_system_labeled()) {
4199 4202 ASSERT(req->rq_label != NULL);
4200 4203 clabel = req->rq_label;
4201 4204 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4202 4205 "got client label from request(1)",
4203 4206 struct svc_req *, req);
4204 4207 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4205 4208 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4206 4209 cs->exi)) {
4207 4210 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4208 4211 if (name != nm)
4209 4212 kmem_free(name, MAXPATHLEN + 1);
4210 4213 kmem_free(nm, len);
4211 4214 if (in_crit)
4212 4215 nbl_end_crit(vp);
4213 4216 VN_RELE(vp);
4214 4217 if (fp) {
4215 4218 rfs4_clear_dont_grant(fp);
4216 4219 rfs4_file_rele(fp);
4217 4220 }
4218 4221 goto out;
4219 4222 }
4220 4223 }
4221 4224 }
4222 4225
4223 4226 /* Get dir "before" change value */
4224 4227 bdva.va_mask = AT_CTIME|AT_SEQ;
4225 4228 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4226 4229 if (error) {
4227 4230 *cs->statusp = resp->status = puterrno4(error);
4228 4231 if (nm != name)
4229 4232 kmem_free(name, MAXPATHLEN + 1);
4230 4233 kmem_free(nm, len);
4231 4234 if (in_crit)
4232 4235 nbl_end_crit(vp);
4233 4236 VN_RELE(vp);
4234 4237 if (fp) {
4235 4238 rfs4_clear_dont_grant(fp);
4236 4239 rfs4_file_rele(fp);
4237 4240 }
4238 4241 goto out;
4239 4242 }
4240 4243 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4241 4244
4242 4245 /* Actually do the REMOVE operation */
4243 4246 if (vp->v_type == VDIR) {
4244 4247 /*
4245 4248 * Can't remove a directory that has a mounted-on filesystem.
4246 4249 */
4247 4250 if (vn_ismntpt(vp)) {
4248 4251 error = EACCES;
4249 4252 } else {
4250 4253 /*
4251 4254 * System V defines rmdir to return EEXIST,
4252 4255 * not ENOTEMPTY, if the directory is not
4253 4256 * empty. A System V NFS server needs to map
4254 4257 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4255 4258 * transmit over the wire.
4256 4259 */
4257 4260 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4258 4261 NULL, 0)) == EEXIST)
4259 4262 error = ENOTEMPTY;
4260 4263 }
4261 4264 } else {
4262 4265 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4263 4266 fp != NULL) {
4264 4267 struct vattr va;
4265 4268 vnode_t *tvp;
4266 4269
4267 4270 rfs4_dbe_lock(fp->rf_dbe);
4268 4271 tvp = fp->rf_vp;
4269 4272 if (tvp)
4270 4273 VN_HOLD(tvp);
4271 4274 rfs4_dbe_unlock(fp->rf_dbe);
4272 4275
4273 4276 if (tvp) {
4274 4277 /*
4275 4278 * This is va_seq safe because we are not
4276 4279 * manipulating dvp.
4277 4280 */
4278 4281 va.va_mask = AT_NLINK;
4279 4282 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4280 4283 va.va_nlink == 0) {
4281 4284 /* Remove state on file remove */
4282 4285 if (in_crit) {
4283 4286 nbl_end_crit(vp);
4284 4287 in_crit = 0;
4285 4288 }
4286 4289 rfs4_close_all_state(fp);
4287 4290 }
4288 4291 VN_RELE(tvp);
4289 4292 }
4290 4293 }
4291 4294 }
4292 4295
4293 4296 if (in_crit)
4294 4297 nbl_end_crit(vp);
4295 4298 VN_RELE(vp);
4296 4299
4297 4300 if (fp) {
4298 4301 rfs4_clear_dont_grant(fp);
4299 4302 rfs4_file_rele(fp);
4300 4303 }
4301 4304 if (nm != name)
4302 4305 kmem_free(name, MAXPATHLEN + 1);
4303 4306 kmem_free(nm, len);
4304 4307
4305 4308 if (error) {
4306 4309 *cs->statusp = resp->status = puterrno4(error);
4307 4310 goto out;
4308 4311 }
4309 4312
4310 4313 /*
4311 4314 * Get the initial "after" sequence number, if it fails, set to zero
4312 4315 */
4313 4316 idva.va_mask = AT_SEQ;
4314 4317 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4315 4318 idva.va_seq = 0;
4316 4319
4317 4320 /*
4318 4321 * Force modified data and metadata out to stable storage.
4319 4322 */
4320 4323 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4321 4324
4322 4325 /*
4323 4326 * Get "after" change value, if it fails, simply return the
4324 4327 * before value.
4325 4328 */
4326 4329 adva.va_mask = AT_CTIME|AT_SEQ;
4327 4330 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4328 4331 adva.va_ctime = bdva.va_ctime;
4329 4332 adva.va_seq = 0;
4330 4333 }
4331 4334
4332 4335 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4333 4336
4334 4337 /*
4335 4338 * The cinfo.atomic = TRUE only if we have
4336 4339 * non-zero va_seq's, and it has incremented by exactly one
4337 4340 * during the VOP_REMOVE/RMDIR and it didn't change during
4338 4341 * the VOP_FSYNC.
4339 4342 */
4340 4343 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4341 4344 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4342 4345 resp->cinfo.atomic = TRUE;
4343 4346 else
4344 4347 resp->cinfo.atomic = FALSE;
4345 4348
4346 4349 *cs->statusp = resp->status = NFS4_OK;
4347 4350
4348 4351 out:
4349 4352 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4350 4353 REMOVE4res *, resp);
4351 4354 }
4352 4355
4353 4356 /*
4354 4357 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4355 4358 * oldname and newname.
4356 4359 * res: status. If success - CURRENT_FH unchanged, return change_info
4357 4360 * for both from and target directories.
4358 4361 */
4359 4362 /* ARGSUSED */
4360 4363 static void
4361 4364 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4362 4365 struct compound_state *cs)
4363 4366 {
4364 4367 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4365 4368 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4366 4369 int error;
4367 4370 vnode_t *odvp;
4368 4371 vnode_t *ndvp;
4369 4372 vnode_t *srcvp, *targvp, *tvp;
4370 4373 struct vattr obdva, oidva, oadva;
4371 4374 struct vattr nbdva, nidva, nadva;
4372 4375 char *onm, *nnm;
4373 4376 uint_t olen, nlen;
4374 4377 rfs4_file_t *fp, *sfp;
4375 4378 int in_crit_src, in_crit_targ;
4376 4379 int fp_rele_grant_hold, sfp_rele_grant_hold;
4377 4380 int unlinked;
4378 4381 bslabel_t *clabel;
4379 4382 struct sockaddr *ca;
4380 4383 char *converted_onm = NULL;
4381 4384 char *converted_nnm = NULL;
4382 4385 nfsstat4 status;
4383 4386
4384 4387 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4385 4388 RENAME4args *, args);
4386 4389
4387 4390 fp = sfp = NULL;
4388 4391 srcvp = targvp = tvp = NULL;
4389 4392 in_crit_src = in_crit_targ = 0;
4390 4393 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4391 4394 unlinked = 0;
4392 4395
4393 4396 /* CURRENT_FH: target directory */
4394 4397 ndvp = cs->vp;
4395 4398 if (ndvp == NULL) {
4396 4399 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4397 4400 goto out;
4398 4401 }
4399 4402
4400 4403 /* SAVED_FH: from directory */
4401 4404 odvp = cs->saved_vp;
4402 4405 if (odvp == NULL) {
4403 4406 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4404 4407 goto out;
4405 4408 }
4406 4409
4407 4410 if (cs->access == CS_ACCESS_DENIED) {
4408 4411 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4409 4412 goto out;
4410 4413 }
4411 4414
4412 4415 /*
4413 4416 * If there is an unshared filesystem mounted on this vnode,
4414 4417 * do not allow to rename objects in this directory.
4415 4418 */
4416 4419 if (vn_ismntpt(odvp)) {
4417 4420 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4418 4421 goto out;
4419 4422 }
4420 4423
4421 4424 /*
4422 4425 * If there is an unshared filesystem mounted on this vnode,
4423 4426 * do not allow to rename to this directory.
4424 4427 */
4425 4428 if (vn_ismntpt(ndvp)) {
4426 4429 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4427 4430 goto out;
4428 4431 }
4429 4432
4430 4433 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4431 4434 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4432 4435 goto out;
4433 4436 }
4434 4437
4435 4438 if (cs->saved_exi != cs->exi) {
4436 4439 *cs->statusp = resp->status = NFS4ERR_XDEV;
4437 4440 goto out;
4438 4441 }
4439 4442
4440 4443 status = utf8_dir_verify(&args->oldname);
4441 4444 if (status != NFS4_OK) {
4442 4445 *cs->statusp = resp->status = status;
4443 4446 goto out;
4444 4447 }
4445 4448
4446 4449 status = utf8_dir_verify(&args->newname);
4447 4450 if (status != NFS4_OK) {
4448 4451 *cs->statusp = resp->status = status;
4449 4452 goto out;
4450 4453 }
4451 4454
4452 4455 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4453 4456 if (onm == NULL) {
4454 4457 *cs->statusp = resp->status = NFS4ERR_INVAL;
4455 4458 goto out;
4456 4459 }
4457 4460 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4458 4461 nlen = MAXPATHLEN + 1;
4459 4462 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4460 4463 nlen);
4461 4464
4462 4465 if (converted_onm == NULL) {
4463 4466 *cs->statusp = resp->status = NFS4ERR_INVAL;
4464 4467 kmem_free(onm, olen);
4465 4468 goto out;
4466 4469 }
4467 4470
4468 4471 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4469 4472 if (nnm == NULL) {
4470 4473 *cs->statusp = resp->status = NFS4ERR_INVAL;
4471 4474 if (onm != converted_onm)
4472 4475 kmem_free(converted_onm, MAXPATHLEN + 1);
4473 4476 kmem_free(onm, olen);
4474 4477 goto out;
4475 4478 }
4476 4479 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4477 4480 MAXPATHLEN + 1);
4478 4481
4479 4482 if (converted_nnm == NULL) {
4480 4483 *cs->statusp = resp->status = NFS4ERR_INVAL;
4481 4484 kmem_free(nnm, nlen);
4482 4485 nnm = NULL;
4483 4486 if (onm != converted_onm)
4484 4487 kmem_free(converted_onm, MAXPATHLEN + 1);
4485 4488 kmem_free(onm, olen);
4486 4489 goto out;
4487 4490 }
4488 4491
4489 4492
4490 4493 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4491 4494 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4492 4495 kmem_free(onm, olen);
4493 4496 kmem_free(nnm, nlen);
4494 4497 goto out;
4495 4498 }
4496 4499
4497 4500
4498 4501 if (rdonly4(req, cs)) {
4499 4502 *cs->statusp = resp->status = NFS4ERR_ROFS;
4500 4503 if (onm != converted_onm)
4501 4504 kmem_free(converted_onm, MAXPATHLEN + 1);
4502 4505 kmem_free(onm, olen);
4503 4506 if (nnm != converted_nnm)
4504 4507 kmem_free(converted_nnm, MAXPATHLEN + 1);
4505 4508 kmem_free(nnm, nlen);
4506 4509 goto out;
4507 4510 }
4508 4511
4509 4512 /* check label of the target dir */
4510 4513 if (is_system_labeled()) {
4511 4514 ASSERT(req->rq_label != NULL);
4512 4515 clabel = req->rq_label;
4513 4516 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4514 4517 "got client label from request(1)",
4515 4518 struct svc_req *, req);
4516 4519 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4517 4520 if (!do_rfs_label_check(clabel, ndvp,
4518 4521 EQUALITY_CHECK, cs->exi)) {
4519 4522 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4520 4523 goto err_out;
4521 4524 }
4522 4525 }
4523 4526 }
4524 4527
4525 4528 /*
4526 4529 * Is the source a file and have a delegation?
4527 4530 * We don't need to acquire va_seq before these lookups, if
4528 4531 * it causes an update, cinfo.before will not match, which will
4529 4532 * trigger a cache flush even if atomic is TRUE.
4530 4533 */
4531 4534 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4532 4535 &error, cs->cr)) {
4533 4536 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4534 4537 NULL)) {
4535 4538 *cs->statusp = resp->status = NFS4ERR_DELAY;
4536 4539 goto err_out;
4537 4540 }
4538 4541 }
4539 4542
4540 4543 if (srcvp == NULL) {
4541 4544 *cs->statusp = resp->status = puterrno4(error);
4542 4545 if (onm != converted_onm)
4543 4546 kmem_free(converted_onm, MAXPATHLEN + 1);
4544 4547 kmem_free(onm, olen);
4545 4548 if (nnm != converted_nnm)
4546 4549 kmem_free(converted_nnm, MAXPATHLEN + 1);
4547 4550 kmem_free(nnm, nlen);
4548 4551 goto out;
4549 4552 }
4550 4553
4551 4554 sfp_rele_grant_hold = 1;
4552 4555
4553 4556 /* Does the destination exist and a file and have a delegation? */
4554 4557 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4555 4558 NULL, cs->cr)) {
4556 4559 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4557 4560 NULL)) {
4558 4561 *cs->statusp = resp->status = NFS4ERR_DELAY;
4559 4562 goto err_out;
4560 4563 }
4561 4564 }
4562 4565 fp_rele_grant_hold = 1;
4563 4566
4564 4567 /* Check for NBMAND lock on both source and target */
4565 4568 if (nbl_need_check(srcvp)) {
4566 4569 nbl_start_crit(srcvp, RW_READER);
4567 4570 in_crit_src = 1;
4568 4571 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4569 4572 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4570 4573 goto err_out;
4571 4574 }
4572 4575 }
4573 4576
4574 4577 if (targvp && nbl_need_check(targvp)) {
4575 4578 nbl_start_crit(targvp, RW_READER);
4576 4579 in_crit_targ = 1;
4577 4580 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4578 4581 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4579 4582 goto err_out;
4580 4583 }
4581 4584 }
4582 4585
4583 4586 /* Get source "before" change value */
4584 4587 obdva.va_mask = AT_CTIME|AT_SEQ;
4585 4588 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4586 4589 if (!error) {
4587 4590 nbdva.va_mask = AT_CTIME|AT_SEQ;
4588 4591 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4589 4592 }
4590 4593 if (error) {
4591 4594 *cs->statusp = resp->status = puterrno4(error);
4592 4595 goto err_out;
4593 4596 }
4594 4597
4595 4598 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4596 4599 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4597 4600
4598 4601 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4599 4602 NULL, 0);
4600 4603
4601 4604 /*
4602 4605 * If target existed and was unlinked by VOP_RENAME, state will need
4603 4606 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4604 4607 * any necessary nbl_end_crit on srcvp and tgtvp.
4605 4608 */
4606 4609 if (error == 0 && fp != NULL) {
4607 4610 rfs4_dbe_lock(fp->rf_dbe);
4608 4611 tvp = fp->rf_vp;
4609 4612 if (tvp)
4610 4613 VN_HOLD(tvp);
4611 4614 rfs4_dbe_unlock(fp->rf_dbe);
4612 4615
4613 4616 if (tvp) {
4614 4617 struct vattr va;
4615 4618 va.va_mask = AT_NLINK;
4616 4619
4617 4620 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4618 4621 va.va_nlink == 0) {
4619 4622 unlinked = 1;
4620 4623
4621 4624 /* DEBUG data */
4622 4625 if ((srcvp == targvp) || (tvp != targvp)) {
4623 4626 cmn_err(CE_WARN, "rfs4_op_rename: "
4624 4627 "srcvp %p, targvp: %p, tvp: %p",
4625 4628 (void *)srcvp, (void *)targvp,
4626 4629 (void *)tvp);
4627 4630 }
4628 4631 } else {
4629 4632 VN_RELE(tvp);
4630 4633 }
4631 4634 }
4632 4635 }
4633 4636 if (error == 0)
4634 4637 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4635 4638
4636 4639 if (in_crit_src)
4637 4640 nbl_end_crit(srcvp);
4638 4641 if (srcvp)
4639 4642 VN_RELE(srcvp);
4640 4643 if (in_crit_targ)
4641 4644 nbl_end_crit(targvp);
4642 4645 if (targvp)
4643 4646 VN_RELE(targvp);
4644 4647
4645 4648 if (unlinked) {
4646 4649 ASSERT(fp != NULL);
4647 4650 ASSERT(tvp != NULL);
4648 4651
4649 4652 /* DEBUG data */
4650 4653 if (RW_READ_HELD(&tvp->v_nbllock)) {
4651 4654 cmn_err(CE_WARN, "rfs4_op_rename: "
4652 4655 "RW_READ_HELD(%p)", (void *)tvp);
4653 4656 }
4654 4657
4655 4658 /* The file is gone and so should the state */
4656 4659 rfs4_close_all_state(fp);
4657 4660 VN_RELE(tvp);
4658 4661 }
4659 4662
4660 4663 if (sfp) {
4661 4664 rfs4_clear_dont_grant(sfp);
4662 4665 rfs4_file_rele(sfp);
4663 4666 }
4664 4667 if (fp) {
4665 4668 rfs4_clear_dont_grant(fp);
4666 4669 rfs4_file_rele(fp);
4667 4670 }
4668 4671
4669 4672 if (converted_onm != onm)
4670 4673 kmem_free(converted_onm, MAXPATHLEN + 1);
4671 4674 kmem_free(onm, olen);
4672 4675 if (converted_nnm != nnm)
4673 4676 kmem_free(converted_nnm, MAXPATHLEN + 1);
4674 4677 kmem_free(nnm, nlen);
4675 4678
4676 4679 /*
4677 4680 * Get the initial "after" sequence number, if it fails, set to zero
4678 4681 */
4679 4682 oidva.va_mask = AT_SEQ;
4680 4683 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4681 4684 oidva.va_seq = 0;
4682 4685
4683 4686 nidva.va_mask = AT_SEQ;
4684 4687 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4685 4688 nidva.va_seq = 0;
4686 4689
4687 4690 /*
4688 4691 * Force modified data and metadata out to stable storage.
4689 4692 */
4690 4693 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4691 4694 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4692 4695
4693 4696 if (error) {
4694 4697 *cs->statusp = resp->status = puterrno4(error);
4695 4698 goto out;
4696 4699 }
4697 4700
4698 4701 /*
4699 4702 * Get "after" change values, if it fails, simply return the
4700 4703 * before value.
4701 4704 */
4702 4705 oadva.va_mask = AT_CTIME|AT_SEQ;
4703 4706 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4704 4707 oadva.va_ctime = obdva.va_ctime;
4705 4708 oadva.va_seq = 0;
4706 4709 }
4707 4710
4708 4711 nadva.va_mask = AT_CTIME|AT_SEQ;
4709 4712 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4710 4713 nadva.va_ctime = nbdva.va_ctime;
4711 4714 nadva.va_seq = 0;
4712 4715 }
4713 4716
4714 4717 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4715 4718 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4716 4719
4717 4720 /*
4718 4721 * The cinfo.atomic = TRUE only if we have
4719 4722 * non-zero va_seq's, and it has incremented by exactly one
4720 4723 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4721 4724 */
4722 4725 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4723 4726 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4724 4727 resp->source_cinfo.atomic = TRUE;
4725 4728 else
4726 4729 resp->source_cinfo.atomic = FALSE;
4727 4730
4728 4731 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4729 4732 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4730 4733 resp->target_cinfo.atomic = TRUE;
4731 4734 else
4732 4735 resp->target_cinfo.atomic = FALSE;
4733 4736
4734 4737 #ifdef VOLATILE_FH_TEST
4735 4738 {
4736 4739 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4737 4740
4738 4741 /*
4739 4742 * Add the renamed file handle to the volatile rename list
4740 4743 */
4741 4744 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4742 4745 /* file handles may expire on rename */
4743 4746 vnode_t *vp;
4744 4747
4745 4748 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4746 4749 /*
4747 4750 * Already know that nnm will be a valid string
4748 4751 */
4749 4752 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4750 4753 NULL, NULL, NULL);
4751 4754 kmem_free(nnm, nlen);
4752 4755 if (!error) {
4753 4756 add_volrnm_fh(cs->exi, vp);
4754 4757 VN_RELE(vp);
4755 4758 }
4756 4759 }
4757 4760 }
4758 4761 #endif /* VOLATILE_FH_TEST */
4759 4762
4760 4763 *cs->statusp = resp->status = NFS4_OK;
4761 4764 out:
4762 4765 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4763 4766 RENAME4res *, resp);
4764 4767 return;
4765 4768
4766 4769 err_out:
4767 4770 if (onm != converted_onm)
4768 4771 kmem_free(converted_onm, MAXPATHLEN + 1);
4769 4772 if (onm != NULL)
4770 4773 kmem_free(onm, olen);
4771 4774 if (nnm != converted_nnm)
4772 4775 kmem_free(converted_nnm, MAXPATHLEN + 1);
4773 4776 if (nnm != NULL)
4774 4777 kmem_free(nnm, nlen);
4775 4778
4776 4779 if (in_crit_src) nbl_end_crit(srcvp);
4777 4780 if (in_crit_targ) nbl_end_crit(targvp);
4778 4781 if (targvp) VN_RELE(targvp);
4779 4782 if (srcvp) VN_RELE(srcvp);
4780 4783 if (sfp) {
4781 4784 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4782 4785 rfs4_file_rele(sfp);
4783 4786 }
4784 4787 if (fp) {
4785 4788 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4786 4789 rfs4_file_rele(fp);
4787 4790 }
4788 4791
4789 4792 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4790 4793 RENAME4res *, resp);
4791 4794 }
4792 4795
4793 4796 /* ARGSUSED */
4794 4797 static void
4795 4798 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4796 4799 struct compound_state *cs)
4797 4800 {
4798 4801 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4799 4802 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4800 4803 rfs4_client_t *cp;
4801 4804
4802 4805 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4803 4806 RENEW4args *, args);
4804 4807
4805 4808 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4806 4809 *cs->statusp = resp->status =
4807 4810 rfs4_check_clientid(&args->clientid, 0);
4808 4811 goto out;
4809 4812 }
4810 4813
4811 4814 if (rfs4_lease_expired(cp)) {
4812 4815 rfs4_client_rele(cp);
4813 4816 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4814 4817 goto out;
4815 4818 }
4816 4819
4817 4820 rfs4_update_lease(cp);
4818 4821
4819 4822 mutex_enter(cp->rc_cbinfo.cb_lock);
4820 4823 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4821 4824 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4822 4825 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4823 4826 } else {
4824 4827 *cs->statusp = resp->status = NFS4_OK;
4825 4828 }
4826 4829 mutex_exit(cp->rc_cbinfo.cb_lock);
4827 4830
4828 4831 rfs4_client_rele(cp);
4829 4832
4830 4833 out:
4831 4834 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4832 4835 RENEW4res *, resp);
4833 4836 }
4834 4837
4835 4838 /* ARGSUSED */
4836 4839 static void
4837 4840 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4838 4841 struct compound_state *cs)
4839 4842 {
4840 4843 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4841 4844
4842 4845 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4843 4846
4844 4847 /* No need to check cs->access - we are not accessing any object */
4845 4848 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4846 4849 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4847 4850 goto out;
4848 4851 }
4849 4852 if (cs->vp != NULL) {
4850 4853 VN_RELE(cs->vp);
4851 4854 }
4852 4855 cs->vp = cs->saved_vp;
4853 4856 cs->saved_vp = NULL;
4854 4857 cs->exi = cs->saved_exi;
4855 4858 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4856 4859 *cs->statusp = resp->status = NFS4_OK;
4857 4860 cs->deleg = FALSE;
4858 4861
4859 4862 out:
4860 4863 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4861 4864 RESTOREFH4res *, resp);
4862 4865 }
4863 4866
4864 4867 /* ARGSUSED */
4865 4868 static void
4866 4869 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4867 4870 struct compound_state *cs)
4868 4871 {
4869 4872 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4870 4873
4871 4874 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4872 4875
4873 4876 /* No need to check cs->access - we are not accessing any object */
4874 4877 if (cs->vp == NULL) {
4875 4878 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4876 4879 goto out;
4877 4880 }
4878 4881 if (cs->saved_vp != NULL) {
4879 4882 VN_RELE(cs->saved_vp);
4880 4883 }
4881 4884 cs->saved_vp = cs->vp;
4882 4885 VN_HOLD(cs->saved_vp);
4883 4886 cs->saved_exi = cs->exi;
4884 4887 /*
4885 4888 * since SAVEFH is fairly rare, don't alloc space for its fh
4886 4889 * unless necessary.
4887 4890 */
4888 4891 if (cs->saved_fh.nfs_fh4_val == NULL) {
4889 4892 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4890 4893 }
4891 4894 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4892 4895 *cs->statusp = resp->status = NFS4_OK;
4893 4896
4894 4897 out:
4895 4898 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4896 4899 SAVEFH4res *, resp);
4897 4900 }
4898 4901
4899 4902 /*
4900 4903 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4901 4904 * return the bitmap of attrs that were set successfully. It is also
4902 4905 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4903 4906 * always be called only after rfs4_do_set_attrs().
4904 4907 *
4905 4908 * Verify that the attributes are same as the expected ones. sargp->vap
4906 4909 * and sargp->sbp contain the input attributes as translated from fattr4.
4907 4910 *
4908 4911 * This function verifies only the attrs that correspond to a vattr or
4909 4912 * vfsstat struct. That is because of the extra step needed to get the
4910 4913 * corresponding system structs. Other attributes have already been set or
4911 4914 * verified by do_rfs4_set_attrs.
4912 4915 *
4913 4916 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4914 4917 */
4915 4918 static int
4916 4919 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4917 4920 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4918 4921 {
4919 4922 int error, ret_error = 0;
4920 4923 int i, k;
4921 4924 uint_t sva_mask = sargp->vap->va_mask;
4922 4925 uint_t vbit;
4923 4926 union nfs4_attr_u *na;
4924 4927 uint8_t *amap;
4925 4928 bool_t getsb = ntovp->vfsstat;
4926 4929
4927 4930 if (sva_mask != 0) {
4928 4931 /*
4929 4932 * Okay to overwrite sargp->vap because we verify based
4930 4933 * on the incoming values.
4931 4934 */
4932 4935 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4933 4936 sargp->cs->cr, NULL);
4934 4937 if (ret_error) {
4935 4938 if (resp == NULL)
4936 4939 return (ret_error);
4937 4940 /*
4938 4941 * Must return bitmap of successful attrs
4939 4942 */
4940 4943 sva_mask = 0; /* to prevent checking vap later */
4941 4944 } else {
4942 4945 /*
4943 4946 * Some file systems clobber va_mask. it is probably
4944 4947 * wrong of them to do so, nonethless we practice
4945 4948 * defensive coding.
4946 4949 * See bug id 4276830.
4947 4950 */
4948 4951 sargp->vap->va_mask = sva_mask;
4949 4952 }
4950 4953 }
4951 4954
4952 4955 if (getsb) {
4953 4956 /*
4954 4957 * Now get the superblock and loop on the bitmap, as there is
4955 4958 * no simple way of translating from superblock to bitmap4.
4956 4959 */
4957 4960 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4958 4961 if (ret_error) {
4959 4962 if (resp == NULL)
4960 4963 goto errout;
4961 4964 getsb = FALSE;
4962 4965 }
4963 4966 }
4964 4967
4965 4968 /*
4966 4969 * Now loop and verify each attribute which getattr returned
4967 4970 * whether it's the same as the input.
4968 4971 */
4969 4972 if (resp == NULL && !getsb && (sva_mask == 0))
4970 4973 goto errout;
4971 4974
4972 4975 na = ntovp->na;
4973 4976 amap = ntovp->amap;
4974 4977 k = 0;
4975 4978 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4976 4979 k = *amap;
4977 4980 ASSERT(nfs4_ntov_map[k].nval == k);
4978 4981 vbit = nfs4_ntov_map[k].vbit;
4979 4982
4980 4983 /*
4981 4984 * If vattr attribute but VOP_GETATTR failed, or it's
4982 4985 * superblock attribute but VFS_STATVFS failed, skip
4983 4986 */
4984 4987 if (vbit) {
4985 4988 if ((vbit & sva_mask) == 0)
4986 4989 continue;
4987 4990 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4988 4991 continue;
4989 4992 }
4990 4993 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4991 4994 if (resp != NULL) {
4992 4995 if (error)
4993 4996 ret_error = -1; /* not all match */
4994 4997 else /* update response bitmap */
4995 4998 *resp |= nfs4_ntov_map[k].fbit;
4996 4999 continue;
4997 5000 }
4998 5001 if (error) {
4999 5002 ret_error = -1; /* not all match */
5000 5003 break;
5001 5004 }
5002 5005 }
5003 5006 errout:
5004 5007 return (ret_error);
5005 5008 }
5006 5009
5007 5010 /*
5008 5011 * Decode the attribute to be set/verified. If the attr requires a sys op
5009 5012 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5010 5013 * call the sv_getit function for it, because the sys op hasn't yet been done.
5011 5014 * Return 0 for success, error code if failed.
5012 5015 *
5013 5016 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5014 5017 */
5015 5018 static int
5016 5019 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5017 5020 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5018 5021 {
5019 5022 int error = 0;
5020 5023 bool_t set_later;
5021 5024
5022 5025 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5023 5026
5024 5027 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5025 5028 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5026 5029 /*
5027 5030 * don't verify yet if a vattr or sb dependent attr,
5028 5031 * because we don't have their sys values yet.
5029 5032 * Will be done later.
5030 5033 */
5031 5034 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5032 5035 /*
5033 5036 * ACLs are a special case, since setting the MODE
5034 5037 * conflicts with setting the ACL. We delay setting
5035 5038 * the ACL until all other attributes have been set.
5036 5039 * The ACL gets set in do_rfs4_op_setattr().
5037 5040 */
5038 5041 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5039 5042 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5040 5043 sargp, nap);
5041 5044 if (error) {
5042 5045 xdr_free(nfs4_ntov_map[k].xfunc,
5043 5046 (caddr_t)nap);
5044 5047 }
5045 5048 }
5046 5049 }
5047 5050 } else {
5048 5051 #ifdef DEBUG
5049 5052 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5050 5053 "decoding attribute %d\n", k);
5051 5054 #endif
5052 5055 error = EINVAL;
5053 5056 }
5054 5057 if (!error && resp_bval && !set_later) {
5055 5058 *resp_bval |= nfs4_ntov_map[k].fbit;
5056 5059 }
5057 5060
5058 5061 return (error);
5059 5062 }
5060 5063
5061 5064 /*
5062 5065 * Set vattr based on incoming fattr4 attrs - used by setattr.
5063 5066 * Set response mask. Ignore any values that are not writable vattr attrs.
5064 5067 */
5065 5068 static nfsstat4
5066 5069 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5067 5070 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5068 5071 nfs4_attr_cmd_t cmd)
5069 5072 {
5070 5073 int error = 0;
5071 5074 int i;
5072 5075 char *attrs = fattrp->attrlist4;
5073 5076 uint32_t attrslen = fattrp->attrlist4_len;
5074 5077 XDR xdr;
5075 5078 nfsstat4 status = NFS4_OK;
5076 5079 vnode_t *vp = cs->vp;
5077 5080 union nfs4_attr_u *na;
5078 5081 uint8_t *amap;
5079 5082
5080 5083 #ifndef lint
5081 5084 /*
5082 5085 * Make sure that maximum attribute number can be expressed as an
5083 5086 * 8 bit quantity.
5084 5087 */
5085 5088 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5086 5089 #endif
5087 5090
5088 5091 if (vp == NULL) {
5089 5092 if (resp)
5090 5093 *resp = 0;
5091 5094 return (NFS4ERR_NOFILEHANDLE);
5092 5095 }
5093 5096 if (cs->access == CS_ACCESS_DENIED) {
5094 5097 if (resp)
5095 5098 *resp = 0;
5096 5099 return (NFS4ERR_ACCESS);
5097 5100 }
5098 5101
5099 5102 sargp->op = cmd;
5100 5103 sargp->cs = cs;
5101 5104 sargp->flag = 0; /* may be set later */
5102 5105 sargp->vap->va_mask = 0;
5103 5106 sargp->rdattr_error = NFS4_OK;
5104 5107 sargp->rdattr_error_req = FALSE;
5105 5108 /* sargp->sbp is set by the caller */
5106 5109
5107 5110 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5108 5111
5109 5112 na = ntovp->na;
5110 5113 amap = ntovp->amap;
5111 5114
5112 5115 /*
5113 5116 * The following loop iterates on the nfs4_ntov_map checking
5114 5117 * if the fbit is set in the requested bitmap.
5115 5118 * If set then we process the arguments using the
5116 5119 * rfs4_fattr4 conversion functions to populate the setattr
5117 5120 * vattr and va_mask. Any settable attrs that are not using vattr
5118 5121 * will be set in this loop.
5119 5122 */
5120 5123 for (i = 0; i < nfs4_ntov_map_size; i++) {
5121 5124 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5122 5125 continue;
5123 5126 }
5124 5127 /*
5125 5128 * If setattr, must be a writable attr.
5126 5129 * If verify/nverify, must be a readable attr.
5127 5130 */
5128 5131 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5129 5132 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5130 5133 /*
5131 5134 * Client tries to set/verify an
5132 5135 * unsupported attribute, tries to set
5133 5136 * a read only attr or verify a write
5134 5137 * only one - error!
5135 5138 */
5136 5139 break;
5137 5140 }
5138 5141 /*
5139 5142 * Decode the attribute to set/verify
5140 5143 */
5141 5144 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5142 5145 &xdr, resp ? resp : NULL, na);
5143 5146 if (error)
5144 5147 break;
5145 5148 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5146 5149 na++;
5147 5150 (ntovp->attrcnt)++;
5148 5151 if (nfs4_ntov_map[i].vfsstat)
5149 5152 ntovp->vfsstat = TRUE;
5150 5153 }
5151 5154
5152 5155 if (error != 0)
5153 5156 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5154 5157 puterrno4(error));
5155 5158 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5156 5159 return (status);
5157 5160 }
5158 5161
5159 5162 static nfsstat4
5160 5163 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5161 5164 stateid4 *stateid)
5162 5165 {
5163 5166 int error = 0;
5164 5167 struct nfs4_svgetit_arg sarg;
5165 5168 bool_t trunc;
5166 5169
5167 5170 nfsstat4 status = NFS4_OK;
5168 5171 cred_t *cr = cs->cr;
5169 5172 vnode_t *vp = cs->vp;
5170 5173 struct nfs4_ntov_table ntov;
5171 5174 struct statvfs64 sb;
5172 5175 struct vattr bva;
5173 5176 struct flock64 bf;
5174 5177 int in_crit = 0;
5175 5178 uint_t saved_mask = 0;
5176 5179 caller_context_t ct;
5177 5180
5178 5181 *resp = 0;
5179 5182 sarg.sbp = &sb;
5180 5183 sarg.is_referral = B_FALSE;
5181 5184 nfs4_ntov_table_init(&ntov);
5182 5185 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5183 5186 NFS4ATTR_SETIT);
5184 5187 if (status != NFS4_OK) {
5185 5188 /*
5186 5189 * failed set attrs
5187 5190 */
5188 5191 goto done;
5189 5192 }
5190 5193 if ((sarg.vap->va_mask == 0) &&
5191 5194 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5192 5195 /*
5193 5196 * no further work to be done
5194 5197 */
5195 5198 goto done;
5196 5199 }
5197 5200
5198 5201 /*
5199 5202 * If we got a request to set the ACL and the MODE, only
5200 5203 * allow changing VSUID, VSGID, and VSVTX. Attempting
5201 5204 * to change any other bits, along with setting an ACL,
5202 5205 * gives NFS4ERR_INVAL.
5203 5206 */
5204 5207 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5205 5208 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5206 5209 vattr_t va;
5207 5210
5208 5211 va.va_mask = AT_MODE;
5209 5212 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5210 5213 if (error) {
5211 5214 status = puterrno4(error);
5212 5215 goto done;
5213 5216 }
5214 5217 if ((sarg.vap->va_mode ^ va.va_mode) &
5215 5218 ~(VSUID | VSGID | VSVTX)) {
5216 5219 status = NFS4ERR_INVAL;
5217 5220 goto done;
5218 5221 }
5219 5222 }
5220 5223
5221 5224 /* Check stateid only if size has been set */
5222 5225 if (sarg.vap->va_mask & AT_SIZE) {
5223 5226 trunc = (sarg.vap->va_size == 0);
5224 5227 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5225 5228 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5226 5229 if (status != NFS4_OK)
5227 5230 goto done;
5228 5231 } else {
5229 5232 ct.cc_sysid = 0;
5230 5233 ct.cc_pid = 0;
5231 5234 ct.cc_caller_id = nfs4_srv_caller_id;
5232 5235 ct.cc_flags = CC_DONTBLOCK;
5233 5236 }
5234 5237
5235 5238 /* XXX start of possible race with delegations */
5236 5239
5237 5240 /*
5238 5241 * We need to specially handle size changes because it is
5239 5242 * possible for the client to create a file with read-only
5240 5243 * modes, but with the file opened for writing. If the client
5241 5244 * then tries to set the file size, e.g. ftruncate(3C),
5242 5245 * fcntl(F_FREESP), the normal access checking done in
5243 5246 * VOP_SETATTR would prevent the client from doing it even though
5244 5247 * it should be allowed to do so. To get around this, we do the
5245 5248 * access checking for ourselves and use VOP_SPACE which doesn't
5246 5249 * do the access checking.
5247 5250 * Also the client should not be allowed to change the file
5248 5251 * size if there is a conflicting non-blocking mandatory lock in
5249 5252 * the region of the change.
5250 5253 */
5251 5254 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5252 5255 u_offset_t offset;
5253 5256 ssize_t length;
5254 5257
5255 5258 /*
5256 5259 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5257 5260 * before returning, sarg.vap->va_mask is used to
5258 5261 * generate the setattr reply bitmap. We also clear
5259 5262 * AT_SIZE below before calling VOP_SPACE. For both
5260 5263 * of these cases, the va_mask needs to be saved here
5261 5264 * and restored after calling VOP_SETATTR.
5262 5265 */
5263 5266 saved_mask = sarg.vap->va_mask;
5264 5267
5265 5268 /*
5266 5269 * Check any possible conflict due to NBMAND locks.
5267 5270 * Get into critical region before VOP_GETATTR, so the
5268 5271 * size attribute is valid when checking conflicts.
5269 5272 */
5270 5273 if (nbl_need_check(vp)) {
5271 5274 nbl_start_crit(vp, RW_READER);
5272 5275 in_crit = 1;
5273 5276 }
5274 5277
5275 5278 bva.va_mask = AT_UID|AT_SIZE;
5276 5279 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5277 5280 status = puterrno4(error);
5278 5281 goto done;
5279 5282 }
5280 5283
5281 5284 if (in_crit) {
5282 5285 if (sarg.vap->va_size < bva.va_size) {
5283 5286 offset = sarg.vap->va_size;
5284 5287 length = bva.va_size - sarg.vap->va_size;
5285 5288 } else {
5286 5289 offset = bva.va_size;
5287 5290 length = sarg.vap->va_size - bva.va_size;
5288 5291 }
5289 5292 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5290 5293 &ct)) {
5291 5294 status = NFS4ERR_LOCKED;
5292 5295 goto done;
5293 5296 }
5294 5297 }
5295 5298
5296 5299 if (crgetuid(cr) == bva.va_uid) {
5297 5300 sarg.vap->va_mask &= ~AT_SIZE;
5298 5301 bf.l_type = F_WRLCK;
5299 5302 bf.l_whence = 0;
5300 5303 bf.l_start = (off64_t)sarg.vap->va_size;
5301 5304 bf.l_len = 0;
5302 5305 bf.l_sysid = 0;
5303 5306 bf.l_pid = 0;
5304 5307 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5305 5308 (offset_t)sarg.vap->va_size, cr, &ct);
5306 5309 }
5307 5310 }
5308 5311
5309 5312 if (!error && sarg.vap->va_mask != 0)
5310 5313 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5311 5314
5312 5315 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5313 5316 if (saved_mask & AT_SIZE)
5314 5317 sarg.vap->va_mask |= AT_SIZE;
5315 5318
5316 5319 /*
5317 5320 * If an ACL was being set, it has been delayed until now,
5318 5321 * in order to set the mode (via the VOP_SETATTR() above) first.
5319 5322 */
5320 5323 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5321 5324 int i;
5322 5325
5323 5326 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5324 5327 if (ntov.amap[i] == FATTR4_ACL)
5325 5328 break;
5326 5329 if (i < NFS4_MAXNUM_ATTRS) {
5327 5330 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5328 5331 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5329 5332 if (error == 0) {
5330 5333 *resp |= FATTR4_ACL_MASK;
5331 5334 } else if (error == ENOTSUP) {
5332 5335 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5333 5336 status = NFS4ERR_ATTRNOTSUPP;
5334 5337 goto done;
5335 5338 }
5336 5339 } else {
5337 5340 NFS4_DEBUG(rfs4_debug,
5338 5341 (CE_NOTE, "do_rfs4_op_setattr: "
5339 5342 "unable to find ACL in fattr4"));
5340 5343 error = EINVAL;
5341 5344 }
5342 5345 }
5343 5346
5344 5347 if (error) {
5345 5348 /* check if a monitor detected a delegation conflict */
5346 5349 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5347 5350 status = NFS4ERR_DELAY;
5348 5351 else
5349 5352 status = puterrno4(error);
5350 5353
5351 5354 /*
5352 5355 * Set the response bitmap when setattr failed.
5353 5356 * If VOP_SETATTR partially succeeded, test by doing a
5354 5357 * VOP_GETATTR on the object and comparing the data
5355 5358 * to the setattr arguments.
5356 5359 */
5357 5360 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5358 5361 } else {
5359 5362 /*
5360 5363 * Force modified metadata out to stable storage.
5361 5364 */
5362 5365 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5363 5366 /*
5364 5367 * Set response bitmap
5365 5368 */
5366 5369 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5367 5370 }
5368 5371
5369 5372 /* Return early and already have a NFSv4 error */
5370 5373 done:
5371 5374 /*
5372 5375 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5373 5376 * conversion sets both readable and writeable NFS4 attrs
5374 5377 * for AT_MTIME and AT_ATIME. The line below masks out
5375 5378 * unrequested attrs from the setattr result bitmap. This
5376 5379 * is placed after the done: label to catch the ATTRNOTSUP
5377 5380 * case.
5378 5381 */
5379 5382 *resp &= fattrp->attrmask;
5380 5383
5381 5384 if (in_crit)
5382 5385 nbl_end_crit(vp);
5383 5386
5384 5387 nfs4_ntov_table_free(&ntov, &sarg);
5385 5388
5386 5389 return (status);
5387 5390 }
5388 5391
5389 5392 /* ARGSUSED */
5390 5393 static void
5391 5394 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5392 5395 struct compound_state *cs)
5393 5396 {
5394 5397 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5395 5398 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5396 5399 bslabel_t *clabel;
5397 5400
5398 5401 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5399 5402 SETATTR4args *, args);
5400 5403
5401 5404 if (cs->vp == NULL) {
5402 5405 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5403 5406 goto out;
5404 5407 }
5405 5408
5406 5409 /*
5407 5410 * If there is an unshared filesystem mounted on this vnode,
5408 5411 * do not allow to setattr on this vnode.
5409 5412 */
5410 5413 if (vn_ismntpt(cs->vp)) {
5411 5414 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5412 5415 goto out;
5413 5416 }
5414 5417
5415 5418 resp->attrsset = 0;
5416 5419
5417 5420 if (rdonly4(req, cs)) {
5418 5421 *cs->statusp = resp->status = NFS4ERR_ROFS;
5419 5422 goto out;
5420 5423 }
5421 5424
5422 5425 /* check label before setting attributes */
5423 5426 if (is_system_labeled()) {
5424 5427 ASSERT(req->rq_label != NULL);
5425 5428 clabel = req->rq_label;
5426 5429 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5427 5430 "got client label from request(1)",
5428 5431 struct svc_req *, req);
5429 5432 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5430 5433 if (!do_rfs_label_check(clabel, cs->vp,
5431 5434 EQUALITY_CHECK, cs->exi)) {
5432 5435 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5433 5436 goto out;
5434 5437 }
5435 5438 }
5436 5439 }
5437 5440
5438 5441 *cs->statusp = resp->status =
5439 5442 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5440 5443 &args->stateid);
5441 5444
5442 5445 out:
5443 5446 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5444 5447 SETATTR4res *, resp);
5445 5448 }
5446 5449
5447 5450 /* ARGSUSED */
5448 5451 static void
5449 5452 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5450 5453 struct compound_state *cs)
5451 5454 {
5452 5455 /*
5453 5456 * verify and nverify are exactly the same, except that nverify
5454 5457 * succeeds when some argument changed, and verify succeeds when
5455 5458 * when none changed.
5456 5459 */
5457 5460
5458 5461 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5459 5462 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5460 5463
5461 5464 int error;
5462 5465 struct nfs4_svgetit_arg sarg;
5463 5466 struct statvfs64 sb;
5464 5467 struct nfs4_ntov_table ntov;
5465 5468
5466 5469 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5467 5470 VERIFY4args *, args);
5468 5471
5469 5472 if (cs->vp == NULL) {
5470 5473 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5471 5474 goto out;
5472 5475 }
5473 5476
5474 5477 sarg.sbp = &sb;
5475 5478 sarg.is_referral = B_FALSE;
5476 5479 nfs4_ntov_table_init(&ntov);
5477 5480 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5478 5481 &sarg, &ntov, NFS4ATTR_VERIT);
5479 5482 if (resp->status != NFS4_OK) {
5480 5483 /*
5481 5484 * do_rfs4_set_attrs will try to verify systemwide attrs,
5482 5485 * so could return -1 for "no match".
5483 5486 */
5484 5487 if (resp->status == -1)
5485 5488 resp->status = NFS4ERR_NOT_SAME;
5486 5489 goto done;
5487 5490 }
5488 5491 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5489 5492 switch (error) {
5490 5493 case 0:
5491 5494 resp->status = NFS4_OK;
5492 5495 break;
5493 5496 case -1:
5494 5497 resp->status = NFS4ERR_NOT_SAME;
5495 5498 break;
5496 5499 default:
5497 5500 resp->status = puterrno4(error);
5498 5501 break;
5499 5502 }
5500 5503 done:
5501 5504 *cs->statusp = resp->status;
5502 5505 nfs4_ntov_table_free(&ntov, &sarg);
5503 5506 out:
5504 5507 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5505 5508 VERIFY4res *, resp);
5506 5509 }
5507 5510
5508 5511 /* ARGSUSED */
5509 5512 static void
5510 5513 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5511 5514 struct compound_state *cs)
5512 5515 {
5513 5516 /*
5514 5517 * verify and nverify are exactly the same, except that nverify
5515 5518 * succeeds when some argument changed, and verify succeeds when
5516 5519 * when none changed.
5517 5520 */
5518 5521
5519 5522 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5520 5523 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5521 5524
5522 5525 int error;
5523 5526 struct nfs4_svgetit_arg sarg;
5524 5527 struct statvfs64 sb;
5525 5528 struct nfs4_ntov_table ntov;
5526 5529
5527 5530 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5528 5531 NVERIFY4args *, args);
5529 5532
5530 5533 if (cs->vp == NULL) {
5531 5534 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5532 5535 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5533 5536 NVERIFY4res *, resp);
5534 5537 return;
5535 5538 }
5536 5539 sarg.sbp = &sb;
5537 5540 sarg.is_referral = B_FALSE;
5538 5541 nfs4_ntov_table_init(&ntov);
5539 5542 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5540 5543 &sarg, &ntov, NFS4ATTR_VERIT);
5541 5544 if (resp->status != NFS4_OK) {
5542 5545 /*
5543 5546 * do_rfs4_set_attrs will try to verify systemwide attrs,
5544 5547 * so could return -1 for "no match".
5545 5548 */
5546 5549 if (resp->status == -1)
5547 5550 resp->status = NFS4_OK;
5548 5551 goto done;
5549 5552 }
5550 5553 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5551 5554 switch (error) {
5552 5555 case 0:
5553 5556 resp->status = NFS4ERR_SAME;
5554 5557 break;
5555 5558 case -1:
5556 5559 resp->status = NFS4_OK;
5557 5560 break;
5558 5561 default:
5559 5562 resp->status = puterrno4(error);
5560 5563 break;
5561 5564 }
5562 5565 done:
5563 5566 *cs->statusp = resp->status;
5564 5567 nfs4_ntov_table_free(&ntov, &sarg);
5565 5568
5566 5569 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5567 5570 NVERIFY4res *, resp);
5568 5571 }
5569 5572
5570 5573 /*
5571 5574 * XXX - This should live in an NFS header file.
5572 5575 */
5573 5576 #define MAX_IOVECS 12
5574 5577
5575 5578 /* ARGSUSED */
5576 5579 static void
5577 5580 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5578 5581 struct compound_state *cs)
5579 5582 {
5580 5583 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5581 5584 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5582 5585 int error;
5583 5586 vnode_t *vp;
5584 5587 struct vattr bva;
5585 5588 u_offset_t rlimit;
5586 5589 struct uio uio;
5587 5590 struct iovec iov[MAX_IOVECS];
5588 5591 struct iovec *iovp;
5589 5592 int iovcnt;
5590 5593 int ioflag;
5591 5594 cred_t *savecred, *cr;
5592 5595 bool_t *deleg = &cs->deleg;
5593 5596 nfsstat4 stat;
5594 5597 int in_crit = 0;
5595 5598 caller_context_t ct;
5596 5599 nfs4_srv_t *nsrv4;
5597 5600
5598 5601 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5599 5602 WRITE4args *, args);
5600 5603
5601 5604 vp = cs->vp;
5602 5605 if (vp == NULL) {
5603 5606 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5604 5607 goto out;
5605 5608 }
5606 5609 if (cs->access == CS_ACCESS_DENIED) {
5607 5610 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5608 5611 goto out;
5609 5612 }
5610 5613
5611 5614 cr = cs->cr;
5612 5615
5613 5616 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5614 5617 deleg, TRUE, &ct)) != NFS4_OK) {
5615 5618 *cs->statusp = resp->status = stat;
5616 5619 goto out;
5617 5620 }
5618 5621
5619 5622 /*
5620 5623 * We have to enter the critical region before calling VOP_RWLOCK
5621 5624 * to avoid a deadlock with ufs.
5622 5625 */
5623 5626 if (nbl_need_check(vp)) {
5624 5627 nbl_start_crit(vp, RW_READER);
5625 5628 in_crit = 1;
5626 5629 if (nbl_conflict(vp, NBL_WRITE,
5627 5630 args->offset, args->data_len, 0, &ct)) {
5628 5631 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5629 5632 goto out;
5630 5633 }
5631 5634 }
5632 5635
5633 5636 bva.va_mask = AT_MODE | AT_UID;
5634 5637 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5635 5638
5636 5639 /*
5637 5640 * If we can't get the attributes, then we can't do the
5638 5641 * right access checking. So, we'll fail the request.
5639 5642 */
5640 5643 if (error) {
5641 5644 *cs->statusp = resp->status = puterrno4(error);
5642 5645 goto out;
5643 5646 }
5644 5647
5645 5648 if (rdonly4(req, cs)) {
5646 5649 *cs->statusp = resp->status = NFS4ERR_ROFS;
5647 5650 goto out;
5648 5651 }
5649 5652
5650 5653 if (vp->v_type != VREG) {
5651 5654 *cs->statusp = resp->status =
5652 5655 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5653 5656 goto out;
5654 5657 }
5655 5658
5656 5659 if (crgetuid(cr) != bva.va_uid &&
|
↓ open down ↓ |
4126 lines elided |
↑ open up ↑ |
5657 5660 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5658 5661 *cs->statusp = resp->status = puterrno4(error);
5659 5662 goto out;
5660 5663 }
5661 5664
5662 5665 if (MANDLOCK(vp, bva.va_mode)) {
5663 5666 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5664 5667 goto out;
5665 5668 }
5666 5669
5667 - nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5670 + nsrv4 = nfs4_get_srv();
5668 5671 if (args->data_len == 0) {
5669 5672 *cs->statusp = resp->status = NFS4_OK;
5670 5673 resp->count = 0;
5671 5674 resp->committed = args->stable;
5672 5675 resp->writeverf = nsrv4->write4verf;
5673 5676 goto out;
5674 5677 }
5675 5678
5676 5679 if (args->mblk != NULL) {
5677 5680 mblk_t *m;
5678 5681 uint_t bytes, round_len;
5679 5682
5680 5683 iovcnt = 0;
5681 5684 bytes = 0;
5682 5685 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5683 5686 for (m = args->mblk;
5684 5687 m != NULL && bytes < round_len;
5685 5688 m = m->b_cont) {
5686 5689 iovcnt++;
5687 5690 bytes += MBLKL(m);
5688 5691 }
5689 5692 #ifdef DEBUG
5690 5693 /* should have ended on an mblk boundary */
5691 5694 if (bytes != round_len) {
5692 5695 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5693 5696 bytes, round_len, args->data_len);
5694 5697 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5695 5698 (void *)args->mblk, (void *)m);
5696 5699 ASSERT(bytes == round_len);
5697 5700 }
5698 5701 #endif
5699 5702 if (iovcnt <= MAX_IOVECS) {
5700 5703 iovp = iov;
5701 5704 } else {
5702 5705 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5703 5706 }
5704 5707 mblk_to_iov(args->mblk, iovcnt, iovp);
5705 5708 } else if (args->rlist != NULL) {
5706 5709 iovcnt = 1;
5707 5710 iovp = iov;
5708 5711 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5709 5712 iovp->iov_len = args->data_len;
5710 5713 } else {
5711 5714 iovcnt = 1;
5712 5715 iovp = iov;
5713 5716 iovp->iov_base = args->data_val;
5714 5717 iovp->iov_len = args->data_len;
5715 5718 }
5716 5719
5717 5720 uio.uio_iov = iovp;
5718 5721 uio.uio_iovcnt = iovcnt;
5719 5722
5720 5723 uio.uio_segflg = UIO_SYSSPACE;
5721 5724 uio.uio_extflg = UIO_COPY_DEFAULT;
5722 5725 uio.uio_loffset = args->offset;
5723 5726 uio.uio_resid = args->data_len;
5724 5727 uio.uio_llimit = curproc->p_fsz_ctl;
5725 5728 rlimit = uio.uio_llimit - args->offset;
5726 5729 if (rlimit < (u_offset_t)uio.uio_resid)
5727 5730 uio.uio_resid = (int)rlimit;
5728 5731
5729 5732 if (args->stable == UNSTABLE4)
5730 5733 ioflag = 0;
5731 5734 else if (args->stable == FILE_SYNC4)
5732 5735 ioflag = FSYNC;
5733 5736 else if (args->stable == DATA_SYNC4)
5734 5737 ioflag = FDSYNC;
5735 5738 else {
5736 5739 if (iovp != iov)
5737 5740 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5738 5741 *cs->statusp = resp->status = NFS4ERR_INVAL;
5739 5742 goto out;
5740 5743 }
5741 5744
5742 5745 /*
5743 5746 * We're changing creds because VM may fault and we need
5744 5747 * the cred of the current thread to be used if quota
5745 5748 * checking is enabled.
5746 5749 */
5747 5750 savecred = curthread->t_cred;
5748 5751 curthread->t_cred = cr;
5749 5752 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5750 5753 curthread->t_cred = savecred;
5751 5754
5752 5755 if (iovp != iov)
5753 5756 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5754 5757
5755 5758 if (error) {
5756 5759 *cs->statusp = resp->status = puterrno4(error);
5757 5760 goto out;
5758 5761 }
5759 5762
5760 5763 *cs->statusp = resp->status = NFS4_OK;
5761 5764 resp->count = args->data_len - uio.uio_resid;
5762 5765
5763 5766 if (ioflag == 0)
5764 5767 resp->committed = UNSTABLE4;
5765 5768 else
5766 5769 resp->committed = FILE_SYNC4;
5767 5770
5768 5771 resp->writeverf = nsrv4->write4verf;
5769 5772
5770 5773 out:
5771 5774 if (in_crit)
5772 5775 nbl_end_crit(vp);
5773 5776
5774 5777 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5775 5778 WRITE4res *, resp);
5776 5779 }
5777 5780
5778 5781
5779 5782 /* XXX put in a header file */
5780 5783 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5781 5784
5782 5785 void
5783 5786 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5784 5787 struct svc_req *req, cred_t *cr, int *rv)
5785 5788 {
5786 5789 uint_t i;
5787 5790 struct compound_state cs;
5788 5791 nfs4_srv_t *nsrv4;
5789 5792 nfs_export_t *ne = nfs_get_export();
5790 5793
5791 5794 if (rv != NULL)
5792 5795 *rv = 0;
5793 5796 rfs4_init_compound_state(&cs);
5794 5797 /*
5795 5798 * Form a reply tag by copying over the reqeuest tag.
5796 5799 */
5797 5800 resp->tag.utf8string_val =
5798 5801 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5799 5802 resp->tag.utf8string_len = args->tag.utf8string_len;
5800 5803 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5801 5804 resp->tag.utf8string_len);
5802 5805
5803 5806 cs.statusp = &resp->status;
5804 5807 cs.req = req;
5805 5808 resp->array = NULL;
5806 5809 resp->array_len = 0;
5807 5810
5808 5811 /*
5809 5812 * XXX for now, minorversion should be zero
5810 5813 */
5811 5814 if (args->minorversion != NFS4_MINORVERSION) {
5812 5815 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5813 5816 &cs, COMPOUND4args *, args);
5814 5817 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5815 5818 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5816 5819 &cs, COMPOUND4res *, resp);
5817 5820 return;
5818 5821 }
5819 5822
5820 5823 if (args->array_len == 0) {
5821 5824 resp->status = NFS4_OK;
5822 5825 return;
5823 5826 }
5824 5827
5825 5828 ASSERT(exi == NULL);
5826 5829 ASSERT(cr == NULL);
5827 5830
5828 5831 cr = crget();
5829 5832 ASSERT(cr != NULL);
5830 5833
5831 5834 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5832 5835 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5833 5836 &cs, COMPOUND4args *, args);
5834 5837 crfree(cr);
5835 5838 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5836 5839 &cs, COMPOUND4res *, resp);
|
↓ open down ↓ |
159 lines elided |
↑ open up ↑ |
5837 5840 svcerr_badcred(req->rq_xprt);
5838 5841 if (rv != NULL)
5839 5842 *rv = 1;
5840 5843 return;
5841 5844 }
5842 5845 resp->array_len = args->array_len;
5843 5846 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5844 5847 KM_SLEEP);
5845 5848
5846 5849 cs.basecr = cr;
5847 - nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5850 + nsrv4 = nfs4_get_srv();
5848 5851
5849 5852 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5850 5853 COMPOUND4args *, args);
5851 5854
5852 5855 /*
5853 5856 * For now, NFS4 compound processing must be protected by
5854 5857 * exported_lock because it can access more than one exportinfo
5855 5858 * per compound and share/unshare can now change multiple
5856 5859 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5857 5860 * per proc (excluding public exinfo), and exi_count design
5858 5861 * is sufficient to protect concurrent execution of NFS2/3
5859 5862 * ops along with unexport. This lock will be removed as
5860 5863 * part of the NFSv4 phase 2 namespace redesign work.
5861 5864 */
5862 5865 rw_enter(&ne->exported_lock, RW_READER);
5863 5866
5864 5867 /*
5865 5868 * If this is the first compound we've seen, we need to start all
5866 5869 * new instances' grace periods.
5867 5870 */
5868 5871 if (nsrv4->seen_first_compound == 0) {
5869 5872 rfs4_grace_start_new(nsrv4);
5870 5873 /*
5871 5874 * This must be set after rfs4_grace_start_new(), otherwise
5872 5875 * another thread could proceed past here before the former
5873 5876 * is finished.
5874 5877 */
5875 5878 nsrv4->seen_first_compound = 1;
5876 5879 }
5877 5880
5878 5881 for (i = 0; i < args->array_len && cs.cont; i++) {
5879 5882 nfs_argop4 *argop;
5880 5883 nfs_resop4 *resop;
5881 5884 uint_t op;
5882 5885
5883 5886 argop = &args->array[i];
5884 5887 resop = &resp->array[i];
5885 5888 resop->resop = argop->argop;
5886 5889 op = (uint_t)resop->resop;
5887 5890
5888 5891 if (op < rfsv4disp_cnt) {
5889 5892 /*
5890 5893 * Count the individual ops here; NULL and COMPOUND
5891 5894 * are counted in common_dispatch()
5892 5895 */
5893 5896 rfsproccnt_v4_ptr[op].value.ui64++;
5894 5897
5895 5898 NFS4_DEBUG(rfs4_debug > 1,
5896 5899 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5897 5900 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5898 5901 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5899 5902 rfs4_op_string[op], *cs.statusp));
5900 5903 if (*cs.statusp != NFS4_OK)
5901 5904 cs.cont = FALSE;
5902 5905 } else {
5903 5906 /*
5904 5907 * This is effectively dead code since XDR code
5905 5908 * will have already returned BADXDR if op doesn't
5906 5909 * decode to legal value. This only done for a
5907 5910 * day when XDR code doesn't verify v4 opcodes.
5908 5911 */
5909 5912 op = OP_ILLEGAL;
5910 5913 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5911 5914
5912 5915 rfs4_op_illegal(argop, resop, req, &cs);
5913 5916 cs.cont = FALSE;
5914 5917 }
5915 5918
5916 5919 /*
5917 5920 * If not at last op, and if we are to stop, then
5918 5921 * compact the results array.
5919 5922 */
5920 5923 if ((i + 1) < args->array_len && !cs.cont) {
5921 5924 nfs_resop4 *new_res = kmem_alloc(
5922 5925 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5923 5926 bcopy(resp->array,
5924 5927 new_res, (i+1) * sizeof (nfs_resop4));
5925 5928 kmem_free(resp->array,
5926 5929 args->array_len * sizeof (nfs_resop4));
5927 5930
5928 5931 resp->array_len = i + 1;
5929 5932 resp->array = new_res;
5930 5933 }
5931 5934 }
5932 5935
5933 5936 rw_exit(&ne->exported_lock);
5934 5937
5935 5938 /*
5936 5939 * clear exportinfo and vnode fields from compound_state before dtrace
5937 5940 * probe, to avoid tracing residual values for path and share path.
5938 5941 */
5939 5942 if (cs.vp)
5940 5943 VN_RELE(cs.vp);
5941 5944 if (cs.saved_vp)
5942 5945 VN_RELE(cs.saved_vp);
5943 5946 cs.exi = cs.saved_exi = NULL;
5944 5947 cs.vp = cs.saved_vp = NULL;
5945 5948
5946 5949 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5947 5950 COMPOUND4res *, resp);
5948 5951
5949 5952 if (cs.saved_fh.nfs_fh4_val)
5950 5953 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5951 5954
5952 5955 if (cs.basecr)
5953 5956 crfree(cs.basecr);
5954 5957 if (cs.cr)
5955 5958 crfree(cs.cr);
5956 5959 /*
5957 5960 * done with this compound request, free the label
5958 5961 */
5959 5962
5960 5963 if (req->rq_label != NULL) {
5961 5964 kmem_free(req->rq_label, sizeof (bslabel_t));
5962 5965 req->rq_label = NULL;
5963 5966 }
5964 5967 }
5965 5968
5966 5969 /*
5967 5970 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5968 5971 * XXX zero out the tag and array values. Need to investigate why the
5969 5972 * XXX calls occur, but at least prevent the panic for now.
5970 5973 */
5971 5974 void
5972 5975 rfs4_compound_free(COMPOUND4res *resp)
5973 5976 {
5974 5977 uint_t i;
5975 5978
5976 5979 if (resp->tag.utf8string_val) {
5977 5980 UTF8STRING_FREE(resp->tag)
5978 5981 }
5979 5982
5980 5983 for (i = 0; i < resp->array_len; i++) {
5981 5984 nfs_resop4 *resop;
5982 5985 uint_t op;
5983 5986
5984 5987 resop = &resp->array[i];
5985 5988 op = (uint_t)resop->resop;
5986 5989 if (op < rfsv4disp_cnt) {
5987 5990 (*rfsv4disptab[op].dis_resfree)(resop);
5988 5991 }
5989 5992 }
5990 5993 if (resp->array != NULL) {
5991 5994 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5992 5995 }
5993 5996 }
5994 5997
5995 5998 /*
5996 5999 * Process the value of the compound request rpc flags, as a bit-AND
5997 6000 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5998 6001 */
5999 6002 void
6000 6003 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6001 6004 {
6002 6005 int i;
6003 6006 int flag = RPC_ALL;
6004 6007
6005 6008 for (i = 0; flag && i < args->array_len; i++) {
6006 6009 uint_t op;
6007 6010
6008 6011 op = (uint_t)args->array[i].argop;
6009 6012
6010 6013 if (op < rfsv4disp_cnt)
6011 6014 flag &= rfsv4disptab[op].dis_flags;
6012 6015 else
6013 6016 flag = 0;
6014 6017 }
6015 6018 *flagp = flag;
6016 6019 }
6017 6020
6018 6021 nfsstat4
6019 6022 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6020 6023 {
6021 6024 nfsstat4 e;
6022 6025
6023 6026 rfs4_dbe_lock(cp->rc_dbe);
6024 6027
6025 6028 if (cp->rc_sysidt != LM_NOSYSID) {
6026 6029 *sp = cp->rc_sysidt;
6027 6030 e = NFS4_OK;
6028 6031
6029 6032 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6030 6033 *sp = cp->rc_sysidt;
6031 6034 e = NFS4_OK;
6032 6035
6033 6036 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6034 6037 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6035 6038 } else
6036 6039 e = NFS4ERR_DELAY;
6037 6040
6038 6041 rfs4_dbe_unlock(cp->rc_dbe);
6039 6042 return (e);
6040 6043 }
6041 6044
6042 6045 #if defined(DEBUG) && ! defined(lint)
6043 6046 static void lock_print(char *str, int operation, struct flock64 *flk)
6044 6047 {
6045 6048 char *op, *type;
6046 6049
6047 6050 switch (operation) {
6048 6051 case F_GETLK: op = "F_GETLK";
6049 6052 break;
6050 6053 case F_SETLK: op = "F_SETLK";
6051 6054 break;
6052 6055 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6053 6056 break;
6054 6057 default: op = "F_UNKNOWN";
6055 6058 break;
6056 6059 }
6057 6060 switch (flk->l_type) {
6058 6061 case F_UNLCK: type = "F_UNLCK";
6059 6062 break;
6060 6063 case F_RDLCK: type = "F_RDLCK";
6061 6064 break;
6062 6065 case F_WRLCK: type = "F_WRLCK";
6063 6066 break;
6064 6067 default: type = "F_UNKNOWN";
6065 6068 break;
6066 6069 }
6067 6070
6068 6071 ASSERT(flk->l_whence == 0);
6069 6072 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6070 6073 str, op, type, (longlong_t)flk->l_start,
6071 6074 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6072 6075 }
6073 6076
6074 6077 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6075 6078 #else
6076 6079 #define LOCK_PRINT(d, s, t, f)
6077 6080 #endif
6078 6081
6079 6082 /*ARGSUSED*/
6080 6083 static bool_t
6081 6084 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6082 6085 {
6083 6086 return (TRUE);
6084 6087 }
6085 6088
6086 6089 /*
6087 6090 * Look up the pathname using the vp in cs as the directory vnode.
6088 6091 * cs->vp will be the vnode for the file on success
6089 6092 */
6090 6093
6091 6094 static nfsstat4
6092 6095 rfs4_lookup(component4 *component, struct svc_req *req,
6093 6096 struct compound_state *cs)
6094 6097 {
6095 6098 char *nm;
6096 6099 uint32_t len;
6097 6100 nfsstat4 status;
6098 6101 struct sockaddr *ca;
6099 6102 char *name;
6100 6103
6101 6104 if (cs->vp == NULL) {
6102 6105 return (NFS4ERR_NOFILEHANDLE);
6103 6106 }
6104 6107 if (cs->vp->v_type != VDIR) {
6105 6108 return (NFS4ERR_NOTDIR);
6106 6109 }
6107 6110
6108 6111 status = utf8_dir_verify(component);
6109 6112 if (status != NFS4_OK)
6110 6113 return (status);
6111 6114
6112 6115 nm = utf8_to_fn(component, &len, NULL);
6113 6116 if (nm == NULL) {
6114 6117 return (NFS4ERR_INVAL);
6115 6118 }
6116 6119
6117 6120 if (len > MAXNAMELEN) {
6118 6121 kmem_free(nm, len);
6119 6122 return (NFS4ERR_NAMETOOLONG);
6120 6123 }
6121 6124
6122 6125 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6123 6126 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6124 6127 MAXPATHLEN + 1);
6125 6128
6126 6129 if (name == NULL) {
6127 6130 kmem_free(nm, len);
6128 6131 return (NFS4ERR_INVAL);
6129 6132 }
6130 6133
6131 6134 status = do_rfs4_op_lookup(name, req, cs);
6132 6135
6133 6136 if (name != nm)
6134 6137 kmem_free(name, MAXPATHLEN + 1);
6135 6138
6136 6139 kmem_free(nm, len);
6137 6140
6138 6141 return (status);
6139 6142 }
6140 6143
6141 6144 static nfsstat4
6142 6145 rfs4_lookupfile(component4 *component, struct svc_req *req,
6143 6146 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6144 6147 {
6145 6148 nfsstat4 status;
6146 6149 vnode_t *dvp = cs->vp;
6147 6150 vattr_t bva, ava, fva;
6148 6151 int error;
6149 6152
6150 6153 /* Get "before" change value */
6151 6154 bva.va_mask = AT_CTIME|AT_SEQ;
6152 6155 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6153 6156 if (error)
6154 6157 return (puterrno4(error));
6155 6158
6156 6159 /* rfs4_lookup may VN_RELE directory */
6157 6160 VN_HOLD(dvp);
6158 6161
6159 6162 status = rfs4_lookup(component, req, cs);
6160 6163 if (status != NFS4_OK) {
6161 6164 VN_RELE(dvp);
6162 6165 return (status);
6163 6166 }
6164 6167
6165 6168 /*
6166 6169 * Get "after" change value, if it fails, simply return the
6167 6170 * before value.
6168 6171 */
6169 6172 ava.va_mask = AT_CTIME|AT_SEQ;
6170 6173 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6171 6174 ava.va_ctime = bva.va_ctime;
6172 6175 ava.va_seq = 0;
6173 6176 }
6174 6177 VN_RELE(dvp);
6175 6178
6176 6179 /*
6177 6180 * Validate the file is a file
6178 6181 */
6179 6182 fva.va_mask = AT_TYPE|AT_MODE;
6180 6183 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6181 6184 if (error)
6182 6185 return (puterrno4(error));
6183 6186
6184 6187 if (fva.va_type != VREG) {
6185 6188 if (fva.va_type == VDIR)
6186 6189 return (NFS4ERR_ISDIR);
6187 6190 if (fva.va_type == VLNK)
6188 6191 return (NFS4ERR_SYMLINK);
6189 6192 return (NFS4ERR_INVAL);
6190 6193 }
6191 6194
6192 6195 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6193 6196 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6194 6197
6195 6198 /*
6196 6199 * It is undefined if VOP_LOOKUP will change va_seq, so
6197 6200 * cinfo.atomic = TRUE only if we have
6198 6201 * non-zero va_seq's, and they have not changed.
6199 6202 */
6200 6203 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6201 6204 cinfo->atomic = TRUE;
6202 6205 else
6203 6206 cinfo->atomic = FALSE;
6204 6207
6205 6208 /* Check for mandatory locking */
6206 6209 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6207 6210 return (check_open_access(access, cs, req));
6208 6211 }
6209 6212
6210 6213 static nfsstat4
6211 6214 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6212 6215 cred_t *cr, vnode_t **vpp, bool_t *created)
6213 6216 {
6214 6217 int error;
6215 6218 nfsstat4 status = NFS4_OK;
6216 6219 vattr_t va;
6217 6220
6218 6221 tryagain:
6219 6222
6220 6223 /*
6221 6224 * The file open mode used is VWRITE. If the client needs
6222 6225 * some other semantic, then it should do the access checking
6223 6226 * itself. It would have been nice to have the file open mode
6224 6227 * passed as part of the arguments.
6225 6228 */
6226 6229
6227 6230 *created = TRUE;
6228 6231 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6229 6232
6230 6233 if (error) {
6231 6234 *created = FALSE;
6232 6235
6233 6236 /*
6234 6237 * If we got something other than file already exists
6235 6238 * then just return this error. Otherwise, we got
6236 6239 * EEXIST. If we were doing a GUARDED create, then
6237 6240 * just return this error. Otherwise, we need to
6238 6241 * make sure that this wasn't a duplicate of an
6239 6242 * exclusive create request.
6240 6243 *
6241 6244 * The assumption is made that a non-exclusive create
6242 6245 * request will never return EEXIST.
6243 6246 */
6244 6247
6245 6248 if (error != EEXIST || mode == GUARDED4) {
6246 6249 status = puterrno4(error);
6247 6250 return (status);
6248 6251 }
6249 6252 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6250 6253 NULL, NULL, NULL);
6251 6254
6252 6255 if (error) {
6253 6256 /*
6254 6257 * We couldn't find the file that we thought that
6255 6258 * we just created. So, we'll just try creating
6256 6259 * it again.
6257 6260 */
6258 6261 if (error == ENOENT)
6259 6262 goto tryagain;
6260 6263
6261 6264 status = puterrno4(error);
6262 6265 return (status);
6263 6266 }
6264 6267
6265 6268 if (mode == UNCHECKED4) {
6266 6269 /* existing object must be regular file */
6267 6270 if ((*vpp)->v_type != VREG) {
6268 6271 if ((*vpp)->v_type == VDIR)
6269 6272 status = NFS4ERR_ISDIR;
6270 6273 else if ((*vpp)->v_type == VLNK)
6271 6274 status = NFS4ERR_SYMLINK;
6272 6275 else
6273 6276 status = NFS4ERR_INVAL;
6274 6277 VN_RELE(*vpp);
6275 6278 return (status);
6276 6279 }
6277 6280
6278 6281 return (NFS4_OK);
6279 6282 }
6280 6283
6281 6284 /* Check for duplicate request */
6282 6285 va.va_mask = AT_MTIME;
6283 6286 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6284 6287 if (!error) {
6285 6288 /* We found the file */
6286 6289 const timestruc_t *mtime = &vap->va_mtime;
6287 6290
6288 6291 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6289 6292 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6290 6293 /* but its not our creation */
6291 6294 VN_RELE(*vpp);
6292 6295 return (NFS4ERR_EXIST);
6293 6296 }
6294 6297 *created = TRUE; /* retrans of create == created */
6295 6298 return (NFS4_OK);
6296 6299 }
6297 6300 VN_RELE(*vpp);
6298 6301 return (NFS4ERR_EXIST);
6299 6302 }
6300 6303
6301 6304 return (NFS4_OK);
6302 6305 }
6303 6306
6304 6307 static nfsstat4
6305 6308 check_open_access(uint32_t access, struct compound_state *cs,
6306 6309 struct svc_req *req)
6307 6310 {
6308 6311 int error;
6309 6312 vnode_t *vp;
6310 6313 bool_t readonly;
6311 6314 cred_t *cr = cs->cr;
6312 6315
6313 6316 /* For now we don't allow mandatory locking as per V2/V3 */
6314 6317 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6315 6318 return (NFS4ERR_ACCESS);
6316 6319 }
6317 6320
6318 6321 vp = cs->vp;
6319 6322 ASSERT(cr != NULL && vp->v_type == VREG);
6320 6323
6321 6324 /*
6322 6325 * If the file system is exported read only and we are trying
6323 6326 * to open for write, then return NFS4ERR_ROFS
6324 6327 */
6325 6328
6326 6329 readonly = rdonly4(req, cs);
6327 6330
6328 6331 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6329 6332 return (NFS4ERR_ROFS);
6330 6333
6331 6334 if (access & OPEN4_SHARE_ACCESS_READ) {
6332 6335 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6333 6336 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6334 6337 return (NFS4ERR_ACCESS);
6335 6338 }
6336 6339 }
6337 6340
6338 6341 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6339 6342 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6340 6343 if (error)
6341 6344 return (NFS4ERR_ACCESS);
6342 6345 }
6343 6346
6344 6347 return (NFS4_OK);
6345 6348 }
6346 6349
6347 6350 static nfsstat4
6348 6351 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6349 6352 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6350 6353 {
6351 6354 struct nfs4_svgetit_arg sarg;
6352 6355 struct nfs4_ntov_table ntov;
6353 6356
6354 6357 bool_t ntov_table_init = FALSE;
6355 6358 struct statvfs64 sb;
6356 6359 nfsstat4 status;
6357 6360 vnode_t *vp;
6358 6361 vattr_t bva, ava, iva, cva, *vap;
6359 6362 vnode_t *dvp;
6360 6363 timespec32_t *mtime;
6361 6364 char *nm = NULL;
6362 6365 uint_t buflen;
6363 6366 bool_t created;
6364 6367 bool_t setsize = FALSE;
6365 6368 len_t reqsize;
6366 6369 int error;
6367 6370 bool_t trunc;
6368 6371 caller_context_t ct;
6369 6372 component4 *component;
6370 6373 bslabel_t *clabel;
6371 6374 struct sockaddr *ca;
6372 6375 char *name = NULL;
6373 6376
6374 6377 sarg.sbp = &sb;
6375 6378 sarg.is_referral = B_FALSE;
6376 6379
6377 6380 dvp = cs->vp;
6378 6381
6379 6382 /* Check if the file system is read only */
6380 6383 if (rdonly4(req, cs))
6381 6384 return (NFS4ERR_ROFS);
6382 6385
6383 6386 /* check the label of including directory */
6384 6387 if (is_system_labeled()) {
6385 6388 ASSERT(req->rq_label != NULL);
6386 6389 clabel = req->rq_label;
6387 6390 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6388 6391 "got client label from request(1)",
6389 6392 struct svc_req *, req);
6390 6393 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6391 6394 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6392 6395 cs->exi)) {
6393 6396 return (NFS4ERR_ACCESS);
6394 6397 }
6395 6398 }
6396 6399 }
6397 6400
6398 6401 /*
6399 6402 * Get the last component of path name in nm. cs will reference
6400 6403 * the including directory on success.
6401 6404 */
6402 6405 component = &args->open_claim4_u.file;
6403 6406 status = utf8_dir_verify(component);
6404 6407 if (status != NFS4_OK)
6405 6408 return (status);
6406 6409
6407 6410 nm = utf8_to_fn(component, &buflen, NULL);
6408 6411
6409 6412 if (nm == NULL)
6410 6413 return (NFS4ERR_RESOURCE);
6411 6414
6412 6415 if (buflen > MAXNAMELEN) {
6413 6416 kmem_free(nm, buflen);
6414 6417 return (NFS4ERR_NAMETOOLONG);
6415 6418 }
6416 6419
6417 6420 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6418 6421 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6419 6422 if (error) {
6420 6423 kmem_free(nm, buflen);
6421 6424 return (puterrno4(error));
6422 6425 }
6423 6426
6424 6427 if (bva.va_type != VDIR) {
6425 6428 kmem_free(nm, buflen);
6426 6429 return (NFS4ERR_NOTDIR);
6427 6430 }
6428 6431
6429 6432 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6430 6433
6431 6434 switch (args->mode) {
6432 6435 case GUARDED4:
6433 6436 /*FALLTHROUGH*/
6434 6437 case UNCHECKED4:
6435 6438 nfs4_ntov_table_init(&ntov);
6436 6439 ntov_table_init = TRUE;
6437 6440
6438 6441 *attrset = 0;
6439 6442 status = do_rfs4_set_attrs(attrset,
6440 6443 &args->createhow4_u.createattrs,
6441 6444 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6442 6445
6443 6446 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6444 6447 sarg.vap->va_type != VREG) {
6445 6448 if (sarg.vap->va_type == VDIR)
6446 6449 status = NFS4ERR_ISDIR;
6447 6450 else if (sarg.vap->va_type == VLNK)
6448 6451 status = NFS4ERR_SYMLINK;
6449 6452 else
6450 6453 status = NFS4ERR_INVAL;
6451 6454 }
6452 6455
6453 6456 if (status != NFS4_OK) {
6454 6457 kmem_free(nm, buflen);
6455 6458 nfs4_ntov_table_free(&ntov, &sarg);
6456 6459 *attrset = 0;
6457 6460 return (status);
6458 6461 }
6459 6462
6460 6463 vap = sarg.vap;
6461 6464 vap->va_type = VREG;
6462 6465 vap->va_mask |= AT_TYPE;
6463 6466
6464 6467 if ((vap->va_mask & AT_MODE) == 0) {
6465 6468 vap->va_mask |= AT_MODE;
6466 6469 vap->va_mode = (mode_t)0600;
6467 6470 }
6468 6471
6469 6472 if (vap->va_mask & AT_SIZE) {
6470 6473
6471 6474 /* Disallow create with a non-zero size */
6472 6475
6473 6476 if ((reqsize = sarg.vap->va_size) != 0) {
6474 6477 kmem_free(nm, buflen);
6475 6478 nfs4_ntov_table_free(&ntov, &sarg);
6476 6479 *attrset = 0;
6477 6480 return (NFS4ERR_INVAL);
6478 6481 }
6479 6482 setsize = TRUE;
6480 6483 }
6481 6484 break;
6482 6485
6483 6486 case EXCLUSIVE4:
6484 6487 /* prohibit EXCL create of named attributes */
6485 6488 if (dvp->v_flag & V_XATTRDIR) {
6486 6489 kmem_free(nm, buflen);
6487 6490 *attrset = 0;
6488 6491 return (NFS4ERR_INVAL);
6489 6492 }
6490 6493
6491 6494 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6492 6495 cva.va_type = VREG;
6493 6496 /*
6494 6497 * Ensure no time overflows. Assumes underlying
6495 6498 * filesystem supports at least 32 bits.
6496 6499 * Truncate nsec to usec resolution to allow valid
6497 6500 * compares even if the underlying filesystem truncates.
6498 6501 */
6499 6502 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6500 6503 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6501 6504 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6502 6505 cva.va_mode = (mode_t)0;
6503 6506 vap = &cva;
6504 6507
6505 6508 /*
6506 6509 * For EXCL create, attrset is set to the server attr
6507 6510 * used to cache the client's verifier.
6508 6511 */
6509 6512 *attrset = FATTR4_TIME_MODIFY_MASK;
6510 6513 break;
6511 6514 }
6512 6515
6513 6516 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6514 6517 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6515 6518 MAXPATHLEN + 1);
6516 6519
6517 6520 if (name == NULL) {
6518 6521 kmem_free(nm, buflen);
6519 6522 return (NFS4ERR_SERVERFAULT);
6520 6523 }
6521 6524
6522 6525 status = create_vnode(dvp, name, vap, args->mode,
6523 6526 cs->cr, &vp, &created);
6524 6527 if (nm != name)
6525 6528 kmem_free(name, MAXPATHLEN + 1);
6526 6529 kmem_free(nm, buflen);
6527 6530
6528 6531 if (status != NFS4_OK) {
6529 6532 if (ntov_table_init)
6530 6533 nfs4_ntov_table_free(&ntov, &sarg);
6531 6534 *attrset = 0;
6532 6535 return (status);
6533 6536 }
6534 6537
6535 6538 trunc = (setsize && !created);
6536 6539
6537 6540 if (args->mode != EXCLUSIVE4) {
6538 6541 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6539 6542
6540 6543 /*
6541 6544 * True verification that object was created with correct
6542 6545 * attrs is impossible. The attrs could have been changed
6543 6546 * immediately after object creation. If attributes did
6544 6547 * not verify, the only recourse for the server is to
6545 6548 * destroy the object. Maybe if some attrs (like gid)
6546 6549 * are set incorrectly, the object should be destroyed;
6547 6550 * however, seems bad as a default policy. Do we really
6548 6551 * want to destroy an object over one of the times not
6549 6552 * verifying correctly? For these reasons, the server
6550 6553 * currently sets bits in attrset for createattrs
6551 6554 * that were set; however, no verification is done.
6552 6555 *
6553 6556 * vmask_to_nmask accounts for vattr bits set on create
6554 6557 * [do_rfs4_set_attrs() only sets resp bits for
6555 6558 * non-vattr/vfs bits.]
6556 6559 * Mask off any bits we set by default so as not to return
6557 6560 * more attrset bits than were requested in createattrs
6558 6561 */
6559 6562 if (created) {
6560 6563 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6561 6564 *attrset &= createmask;
6562 6565 } else {
6563 6566 /*
6564 6567 * We did not create the vnode (we tried but it
6565 6568 * already existed). In this case, the only createattr
6566 6569 * that the spec allows the server to set is size,
6567 6570 * and even then, it can only be set if it is 0.
6568 6571 */
6569 6572 *attrset = 0;
6570 6573 if (trunc)
6571 6574 *attrset = FATTR4_SIZE_MASK;
6572 6575 }
6573 6576 }
6574 6577 if (ntov_table_init)
6575 6578 nfs4_ntov_table_free(&ntov, &sarg);
6576 6579
6577 6580 /*
6578 6581 * Get the initial "after" sequence number, if it fails,
6579 6582 * set to zero, time to before.
6580 6583 */
6581 6584 iva.va_mask = AT_CTIME|AT_SEQ;
6582 6585 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6583 6586 iva.va_seq = 0;
6584 6587 iva.va_ctime = bva.va_ctime;
6585 6588 }
6586 6589
6587 6590 /*
6588 6591 * create_vnode attempts to create the file exclusive,
6589 6592 * if it already exists the VOP_CREATE will fail and
6590 6593 * may not increase va_seq. It is atomic if
6591 6594 * we haven't changed the directory, but if it has changed
6592 6595 * we don't know what changed it.
6593 6596 */
6594 6597 if (!created) {
6595 6598 if (bva.va_seq && iva.va_seq &&
6596 6599 bva.va_seq == iva.va_seq)
6597 6600 cinfo->atomic = TRUE;
6598 6601 else
6599 6602 cinfo->atomic = FALSE;
6600 6603 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6601 6604 } else {
6602 6605 /*
6603 6606 * The entry was created, we need to sync the
6604 6607 * directory metadata.
6605 6608 */
6606 6609 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6607 6610
6608 6611 /*
6609 6612 * Get "after" change value, if it fails, simply return the
6610 6613 * before value.
6611 6614 */
6612 6615 ava.va_mask = AT_CTIME|AT_SEQ;
6613 6616 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6614 6617 ava.va_ctime = bva.va_ctime;
6615 6618 ava.va_seq = 0;
6616 6619 }
6617 6620
6618 6621 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6619 6622
6620 6623 /*
6621 6624 * The cinfo->atomic = TRUE only if we have
6622 6625 * non-zero va_seq's, and it has incremented by exactly one
6623 6626 * during the create_vnode and it didn't
6624 6627 * change during the VOP_FSYNC.
6625 6628 */
6626 6629 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6627 6630 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6628 6631 cinfo->atomic = TRUE;
6629 6632 else
6630 6633 cinfo->atomic = FALSE;
6631 6634 }
6632 6635
6633 6636 /* Check for mandatory locking and that the size gets set. */
6634 6637 cva.va_mask = AT_MODE;
6635 6638 if (setsize)
6636 6639 cva.va_mask |= AT_SIZE;
6637 6640
6638 6641 /* Assume the worst */
6639 6642 cs->mandlock = TRUE;
6640 6643
6641 6644 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6642 6645 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6643 6646
6644 6647 /*
6645 6648 * Truncate the file if necessary; this would be
6646 6649 * the case for create over an existing file.
6647 6650 */
6648 6651
|
↓ open down ↓ |
791 lines elided |
↑ open up ↑ |
6649 6652 if (trunc) {
6650 6653 int in_crit = 0;
6651 6654 rfs4_file_t *fp;
6652 6655 nfs4_srv_t *nsrv4;
6653 6656 bool_t create = FALSE;
6654 6657
6655 6658 /*
6656 6659 * We are writing over an existing file.
6657 6660 * Check to see if we need to recall a delegation.
6658 6661 */
6659 - nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
6662 + nsrv4 = nfs4_get_srv();
6660 6663 rfs4_hold_deleg_policy(nsrv4);
6661 6664 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6662 6665 if (rfs4_check_delegated_byfp(FWRITE, fp,
6663 6666 (reqsize == 0), FALSE, FALSE, &clientid)) {
6664 6667 rfs4_file_rele(fp);
6665 6668 rfs4_rele_deleg_policy(nsrv4);
6666 6669 VN_RELE(vp);
6667 6670 *attrset = 0;
6668 6671 return (NFS4ERR_DELAY);
6669 6672 }
6670 6673 rfs4_file_rele(fp);
6671 6674 }
6672 6675 rfs4_rele_deleg_policy(nsrv4);
6673 6676
6674 6677 if (nbl_need_check(vp)) {
6675 6678 in_crit = 1;
6676 6679
6677 6680 ASSERT(reqsize == 0);
6678 6681
6679 6682 nbl_start_crit(vp, RW_READER);
6680 6683 if (nbl_conflict(vp, NBL_WRITE, 0,
6681 6684 cva.va_size, 0, NULL)) {
6682 6685 in_crit = 0;
6683 6686 nbl_end_crit(vp);
6684 6687 VN_RELE(vp);
6685 6688 *attrset = 0;
6686 6689 return (NFS4ERR_ACCESS);
6687 6690 }
6688 6691 }
6689 6692 ct.cc_sysid = 0;
6690 6693 ct.cc_pid = 0;
6691 6694 ct.cc_caller_id = nfs4_srv_caller_id;
6692 6695 ct.cc_flags = CC_DONTBLOCK;
6693 6696
6694 6697 cva.va_mask = AT_SIZE;
6695 6698 cva.va_size = reqsize;
6696 6699 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6697 6700 if (in_crit)
6698 6701 nbl_end_crit(vp);
6699 6702 }
6700 6703 }
6701 6704
6702 6705 error = makefh4(&cs->fh, vp, cs->exi);
6703 6706
6704 6707 /*
6705 6708 * Force modified data and metadata out to stable storage.
6706 6709 */
6707 6710 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6708 6711
6709 6712 if (error) {
6710 6713 VN_RELE(vp);
6711 6714 *attrset = 0;
6712 6715 return (puterrno4(error));
6713 6716 }
6714 6717
6715 6718 /* if parent dir is attrdir, set namedattr fh flag */
6716 6719 if (dvp->v_flag & V_XATTRDIR)
6717 6720 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6718 6721
6719 6722 if (cs->vp)
6720 6723 VN_RELE(cs->vp);
6721 6724
6722 6725 cs->vp = vp;
6723 6726
6724 6727 /*
6725 6728 * if we did not create the file, we will need to check
6726 6729 * the access bits on the file
6727 6730 */
6728 6731
6729 6732 if (!created) {
6730 6733 if (setsize)
6731 6734 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6732 6735 status = check_open_access(args->share_access, cs, req);
6733 6736 if (status != NFS4_OK)
6734 6737 *attrset = 0;
6735 6738 }
6736 6739 return (status);
6737 6740 }
6738 6741
6739 6742 /*ARGSUSED*/
6740 6743 static void
6741 6744 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6742 6745 rfs4_openowner_t *oo, delegreq_t deleg,
6743 6746 uint32_t access, uint32_t deny,
6744 6747 OPEN4res *resp, int deleg_cur)
6745 6748 {
6746 6749 /* XXX Currently not using req */
6747 6750 rfs4_state_t *sp;
6748 6751 rfs4_file_t *fp;
6749 6752 bool_t screate = TRUE;
6750 6753 bool_t fcreate = TRUE;
6751 6754 uint32_t open_a, share_a;
6752 6755 uint32_t open_d, share_d;
6753 6756 rfs4_deleg_state_t *dsp;
6754 6757 sysid_t sysid;
6755 6758 nfsstat4 status;
6756 6759 caller_context_t ct;
6757 6760 int fflags = 0;
6758 6761 int recall = 0;
6759 6762 int err;
6760 6763 int first_open;
6761 6764
6762 6765 /* get the file struct and hold a lock on it during initial open */
6763 6766 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6764 6767 if (fp == NULL) {
6765 6768 resp->status = NFS4ERR_RESOURCE;
6766 6769 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6767 6770 return;
6768 6771 }
6769 6772
6770 6773 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6771 6774 if (sp == NULL) {
6772 6775 resp->status = NFS4ERR_RESOURCE;
6773 6776 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6774 6777 /* No need to keep any reference */
6775 6778 rw_exit(&fp->rf_file_rwlock);
6776 6779 rfs4_file_rele(fp);
6777 6780 return;
6778 6781 }
6779 6782
6780 6783 /* try to get the sysid before continuing */
6781 6784 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6782 6785 resp->status = status;
6783 6786 rfs4_file_rele(fp);
6784 6787 /* Not a fully formed open; "close" it */
6785 6788 if (screate == TRUE)
6786 6789 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6787 6790 rfs4_state_rele(sp);
6788 6791 return;
6789 6792 }
6790 6793
6791 6794 /* Calculate the fflags for this OPEN. */
6792 6795 if (access & OPEN4_SHARE_ACCESS_READ)
6793 6796 fflags |= FREAD;
6794 6797 if (access & OPEN4_SHARE_ACCESS_WRITE)
6795 6798 fflags |= FWRITE;
6796 6799
6797 6800 rfs4_dbe_lock(sp->rs_dbe);
6798 6801
6799 6802 /*
6800 6803 * Calculate the new deny and access mode that this open is adding to
6801 6804 * the file for this open owner;
6802 6805 */
6803 6806 open_d = (deny & ~sp->rs_open_deny);
6804 6807 open_a = (access & ~sp->rs_open_access);
6805 6808
6806 6809 /*
6807 6810 * Calculate the new share access and share deny modes that this open
6808 6811 * is adding to the file for this open owner;
6809 6812 */
6810 6813 share_a = (access & ~sp->rs_share_access);
6811 6814 share_d = (deny & ~sp->rs_share_deny);
6812 6815
6813 6816 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6814 6817
6815 6818 /*
6816 6819 * Check to see the client has already sent an open for this
6817 6820 * open owner on this file with the same share/deny modes.
6818 6821 * If so, we don't need to check for a conflict and we don't
6819 6822 * need to add another shrlock. If not, then we need to
6820 6823 * check for conflicts in deny and access before checking for
6821 6824 * conflicts in delegation. We don't want to recall a
6822 6825 * delegation based on an open that will eventually fail based
6823 6826 * on shares modes.
6824 6827 */
6825 6828
6826 6829 if (share_a || share_d) {
6827 6830 if ((err = rfs4_share(sp, access, deny)) != 0) {
6828 6831 rfs4_dbe_unlock(sp->rs_dbe);
6829 6832 resp->status = err;
6830 6833
6831 6834 rfs4_file_rele(fp);
6832 6835 /* Not a fully formed open; "close" it */
6833 6836 if (screate == TRUE)
6834 6837 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6835 6838 rfs4_state_rele(sp);
6836 6839 return;
6837 6840 }
6838 6841 }
6839 6842
6840 6843 rfs4_dbe_lock(fp->rf_dbe);
6841 6844
6842 6845 /*
6843 6846 * Check to see if this file is delegated and if so, if a
6844 6847 * recall needs to be done.
6845 6848 */
6846 6849 if (rfs4_check_recall(sp, access)) {
6847 6850 rfs4_dbe_unlock(fp->rf_dbe);
6848 6851 rfs4_dbe_unlock(sp->rs_dbe);
6849 6852 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6850 6853 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6851 6854 rfs4_dbe_lock(sp->rs_dbe);
6852 6855
6853 6856 /* if state closed while lock was dropped */
6854 6857 if (sp->rs_closed) {
6855 6858 if (share_a || share_d)
6856 6859 (void) rfs4_unshare(sp);
6857 6860 rfs4_dbe_unlock(sp->rs_dbe);
6858 6861 rfs4_file_rele(fp);
6859 6862 /* Not a fully formed open; "close" it */
6860 6863 if (screate == TRUE)
6861 6864 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6862 6865 rfs4_state_rele(sp);
6863 6866 resp->status = NFS4ERR_OLD_STATEID;
6864 6867 return;
6865 6868 }
6866 6869
6867 6870 rfs4_dbe_lock(fp->rf_dbe);
6868 6871 /* Let's see if the delegation was returned */
6869 6872 if (rfs4_check_recall(sp, access)) {
6870 6873 rfs4_dbe_unlock(fp->rf_dbe);
6871 6874 if (share_a || share_d)
6872 6875 (void) rfs4_unshare(sp);
6873 6876 rfs4_dbe_unlock(sp->rs_dbe);
6874 6877 rfs4_file_rele(fp);
6875 6878 rfs4_update_lease(sp->rs_owner->ro_client);
6876 6879
6877 6880 /* Not a fully formed open; "close" it */
6878 6881 if (screate == TRUE)
6879 6882 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6880 6883 rfs4_state_rele(sp);
6881 6884 resp->status = NFS4ERR_DELAY;
6882 6885 return;
6883 6886 }
6884 6887 }
6885 6888 /*
6886 6889 * the share check passed and any delegation conflict has been
6887 6890 * taken care of, now call vop_open.
6888 6891 * if this is the first open then call vop_open with fflags.
6889 6892 * if not, call vn_open_upgrade with just the upgrade flags.
6890 6893 *
6891 6894 * if the file has been opened already, it will have the current
6892 6895 * access mode in the state struct. if it has no share access, then
6893 6896 * this is a new open.
6894 6897 *
6895 6898 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6896 6899 * call VOP_OPEN(), just do the open upgrade.
6897 6900 */
6898 6901 if (first_open && !deleg_cur) {
6899 6902 ct.cc_sysid = sysid;
6900 6903 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6901 6904 ct.cc_caller_id = nfs4_srv_caller_id;
6902 6905 ct.cc_flags = CC_DONTBLOCK;
6903 6906 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6904 6907 if (err) {
6905 6908 rfs4_dbe_unlock(fp->rf_dbe);
6906 6909 if (share_a || share_d)
6907 6910 (void) rfs4_unshare(sp);
6908 6911 rfs4_dbe_unlock(sp->rs_dbe);
6909 6912 rfs4_file_rele(fp);
6910 6913
6911 6914 /* Not a fully formed open; "close" it */
6912 6915 if (screate == TRUE)
6913 6916 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6914 6917 rfs4_state_rele(sp);
6915 6918 /* check if a monitor detected a delegation conflict */
6916 6919 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6917 6920 resp->status = NFS4ERR_DELAY;
6918 6921 else
6919 6922 resp->status = NFS4ERR_SERVERFAULT;
6920 6923 return;
6921 6924 }
6922 6925 } else { /* open upgrade */
6923 6926 /*
6924 6927 * calculate the fflags for the new mode that is being added
6925 6928 * by this upgrade.
6926 6929 */
6927 6930 fflags = 0;
6928 6931 if (open_a & OPEN4_SHARE_ACCESS_READ)
6929 6932 fflags |= FREAD;
6930 6933 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6931 6934 fflags |= FWRITE;
6932 6935 vn_open_upgrade(cs->vp, fflags);
6933 6936 }
6934 6937 sp->rs_open_access |= access;
6935 6938 sp->rs_open_deny |= deny;
6936 6939
6937 6940 if (open_d & OPEN4_SHARE_DENY_READ)
6938 6941 fp->rf_deny_read++;
6939 6942 if (open_d & OPEN4_SHARE_DENY_WRITE)
6940 6943 fp->rf_deny_write++;
6941 6944 fp->rf_share_deny |= deny;
6942 6945
6943 6946 if (open_a & OPEN4_SHARE_ACCESS_READ)
6944 6947 fp->rf_access_read++;
6945 6948 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6946 6949 fp->rf_access_write++;
6947 6950 fp->rf_share_access |= access;
6948 6951
6949 6952 /*
6950 6953 * Check for delegation here. if the deleg argument is not
6951 6954 * DELEG_ANY, then this is a reclaim from a client and
6952 6955 * we must honor the delegation requested. If necessary we can
6953 6956 * set the recall flag.
6954 6957 */
6955 6958
6956 6959 dsp = rfs4_grant_delegation(deleg, sp, &recall);
6957 6960
6958 6961 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6959 6962
6960 6963 next_stateid(&sp->rs_stateid);
6961 6964
6962 6965 resp->stateid = sp->rs_stateid.stateid;
6963 6966
6964 6967 rfs4_dbe_unlock(fp->rf_dbe);
6965 6968 rfs4_dbe_unlock(sp->rs_dbe);
6966 6969
6967 6970 if (dsp) {
6968 6971 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6969 6972 rfs4_deleg_state_rele(dsp);
6970 6973 }
6971 6974
6972 6975 rfs4_file_rele(fp);
6973 6976 rfs4_state_rele(sp);
6974 6977
6975 6978 resp->status = NFS4_OK;
6976 6979 }
6977 6980
6978 6981 /*ARGSUSED*/
6979 6982 static void
6980 6983 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6981 6984 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6982 6985 {
6983 6986 change_info4 *cinfo = &resp->cinfo;
6984 6987 bitmap4 *attrset = &resp->attrset;
6985 6988
6986 6989 if (args->opentype == OPEN4_NOCREATE)
6987 6990 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6988 6991 req, cs, args->share_access, cinfo);
6989 6992 else {
6990 6993 /* inhibit delegation grants during exclusive create */
6991 6994
6992 6995 if (args->mode == EXCLUSIVE4)
6993 6996 rfs4_disable_delegation();
6994 6997
6995 6998 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6996 6999 oo->ro_client->rc_clientid);
6997 7000 }
6998 7001
6999 7002 if (resp->status == NFS4_OK) {
7000 7003
7001 7004 /* cs->vp cs->fh now reference the desired file */
7002 7005
7003 7006 rfs4_do_open(cs, req, oo,
7004 7007 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7005 7008 args->share_access, args->share_deny, resp, 0);
7006 7009
7007 7010 /*
7008 7011 * If rfs4_createfile set attrset, we must
7009 7012 * clear this attrset before the response is copied.
7010 7013 */
7011 7014 if (resp->status != NFS4_OK && resp->attrset) {
7012 7015 resp->attrset = 0;
7013 7016 }
7014 7017 }
7015 7018 else
7016 7019 *cs->statusp = resp->status;
7017 7020
7018 7021 if (args->mode == EXCLUSIVE4)
7019 7022 rfs4_enable_delegation();
7020 7023 }
7021 7024
7022 7025 /*ARGSUSED*/
7023 7026 static void
7024 7027 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7025 7028 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7026 7029 {
7027 7030 change_info4 *cinfo = &resp->cinfo;
7028 7031 vattr_t va;
7029 7032 vtype_t v_type = cs->vp->v_type;
7030 7033 int error = 0;
7031 7034
7032 7035 /* Verify that we have a regular file */
7033 7036 if (v_type != VREG) {
7034 7037 if (v_type == VDIR)
7035 7038 resp->status = NFS4ERR_ISDIR;
7036 7039 else if (v_type == VLNK)
7037 7040 resp->status = NFS4ERR_SYMLINK;
7038 7041 else
7039 7042 resp->status = NFS4ERR_INVAL;
7040 7043 return;
7041 7044 }
7042 7045
7043 7046 va.va_mask = AT_MODE|AT_UID;
7044 7047 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7045 7048 if (error) {
7046 7049 resp->status = puterrno4(error);
7047 7050 return;
7048 7051 }
7049 7052
7050 7053 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7051 7054
7052 7055 /*
7053 7056 * Check if we have access to the file, Note the the file
7054 7057 * could have originally been open UNCHECKED or GUARDED
7055 7058 * with mode bits that will now fail, but there is nothing
7056 7059 * we can really do about that except in the case that the
7057 7060 * owner of the file is the one requesting the open.
7058 7061 */
7059 7062 if (crgetuid(cs->cr) != va.va_uid) {
7060 7063 resp->status = check_open_access(args->share_access, cs, req);
7061 7064 if (resp->status != NFS4_OK) {
7062 7065 return;
7063 7066 }
7064 7067 }
7065 7068
7066 7069 /*
7067 7070 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7068 7071 */
7069 7072 cinfo->before = 0;
7070 7073 cinfo->after = 0;
7071 7074 cinfo->atomic = FALSE;
7072 7075
7073 7076 rfs4_do_open(cs, req, oo,
7074 7077 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7075 7078 args->share_access, args->share_deny, resp, 0);
7076 7079 }
7077 7080
7078 7081 static void
7079 7082 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7080 7083 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7081 7084 {
7082 7085 int error;
7083 7086 nfsstat4 status;
7084 7087 stateid4 stateid =
7085 7088 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7086 7089 rfs4_deleg_state_t *dsp;
7087 7090
7088 7091 /*
7089 7092 * Find the state info from the stateid and confirm that the
7090 7093 * file is delegated. If the state openowner is the same as
7091 7094 * the supplied openowner we're done. If not, get the file
7092 7095 * info from the found state info. Use that file info to
7093 7096 * create the state for this lock owner. Note solaris doen't
7094 7097 * really need the pathname to find the file. We may want to
7095 7098 * lookup the pathname and make sure that the vp exist and
7096 7099 * matches the vp in the file structure. However it is
7097 7100 * possible that the pathname nolonger exists (local process
7098 7101 * unlinks the file), so this may not be that useful.
7099 7102 */
7100 7103
7101 7104 status = rfs4_get_deleg_state(&stateid, &dsp);
7102 7105 if (status != NFS4_OK) {
7103 7106 resp->status = status;
7104 7107 return;
7105 7108 }
7106 7109
7107 7110 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7108 7111
7109 7112 /*
7110 7113 * New lock owner, create state. Since this was probably called
7111 7114 * in response to a CB_RECALL we set deleg to DELEG_NONE
7112 7115 */
7113 7116
7114 7117 ASSERT(cs->vp != NULL);
7115 7118 VN_RELE(cs->vp);
7116 7119 VN_HOLD(dsp->rds_finfo->rf_vp);
7117 7120 cs->vp = dsp->rds_finfo->rf_vp;
7118 7121
7119 7122 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7120 7123 rfs4_deleg_state_rele(dsp);
7121 7124 *cs->statusp = resp->status = puterrno4(error);
7122 7125 return;
7123 7126 }
7124 7127
7125 7128 /* Mark progress for delegation returns */
7126 7129 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7127 7130 rfs4_deleg_state_rele(dsp);
7128 7131 rfs4_do_open(cs, req, oo, DELEG_NONE,
7129 7132 args->share_access, args->share_deny, resp, 1);
7130 7133 }
7131 7134
7132 7135 /*ARGSUSED*/
7133 7136 static void
7134 7137 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7135 7138 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7136 7139 {
7137 7140 /*
7138 7141 * Lookup the pathname, it must already exist since this file
7139 7142 * was delegated.
7140 7143 *
7141 7144 * Find the file and state info for this vp and open owner pair.
7142 7145 * check that they are in fact delegated.
7143 7146 * check that the state access and deny modes are the same.
7144 7147 *
7145 7148 * Return the delgation possibly seting the recall flag.
7146 7149 */
7147 7150 rfs4_file_t *fp;
7148 7151 rfs4_state_t *sp;
7149 7152 bool_t create = FALSE;
7150 7153 bool_t dcreate = FALSE;
7151 7154 rfs4_deleg_state_t *dsp;
7152 7155 nfsace4 *ace;
7153 7156
7154 7157 /* Note we ignore oflags */
7155 7158 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7156 7159 req, cs, args->share_access, &resp->cinfo);
7157 7160
7158 7161 if (resp->status != NFS4_OK) {
7159 7162 return;
7160 7163 }
7161 7164
7162 7165 /* get the file struct and hold a lock on it during initial open */
7163 7166 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7164 7167 if (fp == NULL) {
7165 7168 resp->status = NFS4ERR_RESOURCE;
7166 7169 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7167 7170 return;
7168 7171 }
7169 7172
7170 7173 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7171 7174 if (sp == NULL) {
7172 7175 resp->status = NFS4ERR_SERVERFAULT;
7173 7176 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7174 7177 rw_exit(&fp->rf_file_rwlock);
7175 7178 rfs4_file_rele(fp);
7176 7179 return;
7177 7180 }
7178 7181
7179 7182 rfs4_dbe_lock(sp->rs_dbe);
7180 7183 rfs4_dbe_lock(fp->rf_dbe);
7181 7184 if (args->share_access != sp->rs_share_access ||
7182 7185 args->share_deny != sp->rs_share_deny ||
7183 7186 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7184 7187 NFS4_DEBUG(rfs4_debug,
7185 7188 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7186 7189 rfs4_dbe_unlock(fp->rf_dbe);
7187 7190 rfs4_dbe_unlock(sp->rs_dbe);
7188 7191 rfs4_file_rele(fp);
7189 7192 rfs4_state_rele(sp);
7190 7193 resp->status = NFS4ERR_SERVERFAULT;
7191 7194 return;
7192 7195 }
7193 7196 rfs4_dbe_unlock(fp->rf_dbe);
7194 7197 rfs4_dbe_unlock(sp->rs_dbe);
7195 7198
7196 7199 dsp = rfs4_finddeleg(sp, &dcreate);
7197 7200 if (dsp == NULL) {
7198 7201 rfs4_state_rele(sp);
7199 7202 rfs4_file_rele(fp);
7200 7203 resp->status = NFS4ERR_SERVERFAULT;
7201 7204 return;
7202 7205 }
7203 7206
7204 7207 next_stateid(&sp->rs_stateid);
7205 7208
7206 7209 resp->stateid = sp->rs_stateid.stateid;
7207 7210
7208 7211 resp->delegation.delegation_type = dsp->rds_dtype;
7209 7212
7210 7213 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7211 7214 open_read_delegation4 *rv =
7212 7215 &resp->delegation.open_delegation4_u.read;
7213 7216
7214 7217 rv->stateid = dsp->rds_delegid.stateid;
7215 7218 rv->recall = FALSE; /* no policy in place to set to TRUE */
7216 7219 ace = &rv->permissions;
7217 7220 } else {
7218 7221 open_write_delegation4 *rv =
7219 7222 &resp->delegation.open_delegation4_u.write;
7220 7223
7221 7224 rv->stateid = dsp->rds_delegid.stateid;
7222 7225 rv->recall = FALSE; /* no policy in place to set to TRUE */
7223 7226 ace = &rv->permissions;
7224 7227 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7225 7228 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7226 7229 }
7227 7230
7228 7231 /* XXX For now */
7229 7232 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7230 7233 ace->flag = 0;
7231 7234 ace->access_mask = 0;
7232 7235 ace->who.utf8string_len = 0;
7233 7236 ace->who.utf8string_val = 0;
7234 7237
7235 7238 rfs4_deleg_state_rele(dsp);
7236 7239 rfs4_state_rele(sp);
7237 7240 rfs4_file_rele(fp);
7238 7241 }
7239 7242
7240 7243 typedef enum {
7241 7244 NFS4_CHKSEQ_OKAY = 0,
7242 7245 NFS4_CHKSEQ_REPLAY = 1,
7243 7246 NFS4_CHKSEQ_BAD = 2
7244 7247 } rfs4_chkseq_t;
7245 7248
7246 7249 /*
7247 7250 * Generic function for sequence number checks.
7248 7251 */
7249 7252 static rfs4_chkseq_t
7250 7253 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7251 7254 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7252 7255 {
7253 7256 /* Same sequence ids and matching operations? */
7254 7257 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7255 7258 if (copyres == TRUE) {
7256 7259 rfs4_free_reply(resop);
7257 7260 rfs4_copy_reply(resop, lastop);
7258 7261 }
7259 7262 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7260 7263 "Replayed SEQID %d\n", seqid));
7261 7264 return (NFS4_CHKSEQ_REPLAY);
7262 7265 }
7263 7266
7264 7267 /* If the incoming sequence is not the next expected then it is bad */
7265 7268 if (rqst_seq != seqid + 1) {
7266 7269 if (rqst_seq == seqid) {
7267 7270 NFS4_DEBUG(rfs4_debug,
7268 7271 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7269 7272 "but last op was %d current op is %d\n",
7270 7273 lastop->resop, resop->resop));
7271 7274 return (NFS4_CHKSEQ_BAD);
7272 7275 }
7273 7276 NFS4_DEBUG(rfs4_debug,
7274 7277 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7275 7278 rqst_seq, seqid));
7276 7279 return (NFS4_CHKSEQ_BAD);
7277 7280 }
7278 7281
7279 7282 /* Everything okay -- next expected */
7280 7283 return (NFS4_CHKSEQ_OKAY);
7281 7284 }
7282 7285
7283 7286
7284 7287 static rfs4_chkseq_t
7285 7288 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7286 7289 {
7287 7290 rfs4_chkseq_t rc;
7288 7291
7289 7292 rfs4_dbe_lock(op->ro_dbe);
7290 7293 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7291 7294 TRUE);
7292 7295 rfs4_dbe_unlock(op->ro_dbe);
7293 7296
7294 7297 if (rc == NFS4_CHKSEQ_OKAY)
7295 7298 rfs4_update_lease(op->ro_client);
7296 7299
7297 7300 return (rc);
7298 7301 }
7299 7302
7300 7303 static rfs4_chkseq_t
7301 7304 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7302 7305 {
7303 7306 rfs4_chkseq_t rc;
7304 7307
7305 7308 rfs4_dbe_lock(op->ro_dbe);
7306 7309 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7307 7310 olo_seqid, resop, FALSE);
7308 7311 rfs4_dbe_unlock(op->ro_dbe);
7309 7312
7310 7313 return (rc);
7311 7314 }
7312 7315
7313 7316 static rfs4_chkseq_t
7314 7317 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7315 7318 {
7316 7319 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7317 7320
7318 7321 rfs4_dbe_lock(lsp->rls_dbe);
7319 7322 if (!lsp->rls_skip_seqid_check)
7320 7323 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7321 7324 resop, TRUE);
7322 7325 rfs4_dbe_unlock(lsp->rls_dbe);
7323 7326
7324 7327 return (rc);
7325 7328 }
7326 7329
7327 7330 static void
7328 7331 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7329 7332 struct svc_req *req, struct compound_state *cs)
7330 7333 {
7331 7334 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7332 7335 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7333 7336 open_owner4 *owner = &args->owner;
7334 7337 open_claim_type4 claim = args->claim;
7335 7338 rfs4_client_t *cp;
7336 7339 rfs4_openowner_t *oo;
7337 7340 bool_t create;
7338 7341 bool_t replay = FALSE;
7339 7342 int can_reclaim;
7340 7343
7341 7344 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7342 7345 OPEN4args *, args);
7343 7346
7344 7347 if (cs->vp == NULL) {
7345 7348 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7346 7349 goto end;
7347 7350 }
7348 7351
7349 7352 /*
7350 7353 * Need to check clientid and lease expiration first based on
7351 7354 * error ordering and incrementing sequence id.
7352 7355 */
7353 7356 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7354 7357 if (cp == NULL) {
7355 7358 *cs->statusp = resp->status =
7356 7359 rfs4_check_clientid(&owner->clientid, 0);
7357 7360 goto end;
7358 7361 }
7359 7362
7360 7363 if (rfs4_lease_expired(cp)) {
7361 7364 rfs4_client_close(cp);
7362 7365 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7363 7366 goto end;
7364 7367 }
7365 7368 can_reclaim = cp->rc_can_reclaim;
7366 7369
7367 7370 /*
7368 7371 * Find the open_owner for use from this point forward. Take
7369 7372 * care in updating the sequence id based on the type of error
7370 7373 * being returned.
7371 7374 */
7372 7375 retry:
7373 7376 create = TRUE;
7374 7377 oo = rfs4_findopenowner(owner, &create, args->seqid);
7375 7378 if (oo == NULL) {
7376 7379 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7377 7380 rfs4_client_rele(cp);
7378 7381 goto end;
7379 7382 }
7380 7383
7381 7384 /* Hold off access to the sequence space while the open is done */
7382 7385 rfs4_sw_enter(&oo->ro_sw);
7383 7386
7384 7387 /*
7385 7388 * If the open_owner existed before at the server, then check
7386 7389 * the sequence id.
7387 7390 */
7388 7391 if (!create && !oo->ro_postpone_confirm) {
7389 7392 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7390 7393 case NFS4_CHKSEQ_BAD:
7391 7394 if ((args->seqid > oo->ro_open_seqid) &&
7392 7395 oo->ro_need_confirm) {
7393 7396 rfs4_free_opens(oo, TRUE, FALSE);
7394 7397 rfs4_sw_exit(&oo->ro_sw);
7395 7398 rfs4_openowner_rele(oo);
7396 7399 goto retry;
7397 7400 }
7398 7401 resp->status = NFS4ERR_BAD_SEQID;
7399 7402 goto out;
7400 7403 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7401 7404 replay = TRUE;
7402 7405 goto out;
7403 7406 default:
7404 7407 break;
7405 7408 }
7406 7409
7407 7410 /*
7408 7411 * Sequence was ok and open owner exists
7409 7412 * check to see if we have yet to see an
7410 7413 * open_confirm.
7411 7414 */
7412 7415 if (oo->ro_need_confirm) {
7413 7416 rfs4_free_opens(oo, TRUE, FALSE);
7414 7417 rfs4_sw_exit(&oo->ro_sw);
7415 7418 rfs4_openowner_rele(oo);
7416 7419 goto retry;
7417 7420 }
7418 7421 }
7419 7422 /* Grace only applies to regular-type OPENs */
7420 7423 if (rfs4_clnt_in_grace(cp) &&
7421 7424 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7422 7425 *cs->statusp = resp->status = NFS4ERR_GRACE;
7423 7426 goto out;
7424 7427 }
7425 7428
7426 7429 /*
7427 7430 * If previous state at the server existed then can_reclaim
7428 7431 * will be set. If not reply NFS4ERR_NO_GRACE to the
7429 7432 * client.
7430 7433 */
7431 7434 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7432 7435 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7433 7436 goto out;
7434 7437 }
7435 7438
7436 7439
7437 7440 /*
7438 7441 * Reject the open if the client has missed the grace period
7439 7442 */
7440 7443 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7441 7444 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7442 7445 goto out;
7443 7446 }
7444 7447
7445 7448 /* Couple of up-front bookkeeping items */
7446 7449 if (oo->ro_need_confirm) {
7447 7450 /*
7448 7451 * If this is a reclaim OPEN then we should not ask
7449 7452 * for a confirmation of the open_owner per the
7450 7453 * protocol specification.
7451 7454 */
7452 7455 if (claim == CLAIM_PREVIOUS)
7453 7456 oo->ro_need_confirm = FALSE;
7454 7457 else
7455 7458 resp->rflags |= OPEN4_RESULT_CONFIRM;
7456 7459 }
7457 7460 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7458 7461
7459 7462 /*
7460 7463 * If there is an unshared filesystem mounted on this vnode,
7461 7464 * do not allow to open/create in this directory.
7462 7465 */
7463 7466 if (vn_ismntpt(cs->vp)) {
7464 7467 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7465 7468 goto out;
7466 7469 }
7467 7470
7468 7471 /*
7469 7472 * access must READ, WRITE, or BOTH. No access is invalid.
7470 7473 * deny can be READ, WRITE, BOTH, or NONE.
7471 7474 * bits not defined for access/deny are invalid.
7472 7475 */
7473 7476 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7474 7477 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7475 7478 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7476 7479 *cs->statusp = resp->status = NFS4ERR_INVAL;
7477 7480 goto out;
7478 7481 }
7479 7482
7480 7483
7481 7484 /*
7482 7485 * make sure attrset is zero before response is built.
7483 7486 */
7484 7487 resp->attrset = 0;
7485 7488
7486 7489 switch (claim) {
7487 7490 case CLAIM_NULL:
7488 7491 rfs4_do_opennull(cs, req, args, oo, resp);
7489 7492 break;
7490 7493 case CLAIM_PREVIOUS:
7491 7494 rfs4_do_openprev(cs, req, args, oo, resp);
7492 7495 break;
7493 7496 case CLAIM_DELEGATE_CUR:
7494 7497 rfs4_do_opendelcur(cs, req, args, oo, resp);
7495 7498 break;
7496 7499 case CLAIM_DELEGATE_PREV:
7497 7500 rfs4_do_opendelprev(cs, req, args, oo, resp);
7498 7501 break;
7499 7502 default:
7500 7503 resp->status = NFS4ERR_INVAL;
7501 7504 break;
7502 7505 }
7503 7506
7504 7507 out:
7505 7508 rfs4_client_rele(cp);
7506 7509
7507 7510 /* Catch sequence id handling here to make it a little easier */
7508 7511 switch (resp->status) {
7509 7512 case NFS4ERR_BADXDR:
7510 7513 case NFS4ERR_BAD_SEQID:
7511 7514 case NFS4ERR_BAD_STATEID:
7512 7515 case NFS4ERR_NOFILEHANDLE:
7513 7516 case NFS4ERR_RESOURCE:
7514 7517 case NFS4ERR_STALE_CLIENTID:
7515 7518 case NFS4ERR_STALE_STATEID:
7516 7519 /*
7517 7520 * The protocol states that if any of these errors are
7518 7521 * being returned, the sequence id should not be
7519 7522 * incremented. Any other return requires an
7520 7523 * increment.
7521 7524 */
7522 7525 break;
7523 7526 default:
7524 7527 /* Always update the lease in this case */
7525 7528 rfs4_update_lease(oo->ro_client);
7526 7529
7527 7530 /* Regular response - copy the result */
7528 7531 if (!replay)
7529 7532 rfs4_update_open_resp(oo, resop, &cs->fh);
7530 7533
7531 7534 /*
7532 7535 * REPLAY case: Only if the previous response was OK
7533 7536 * do we copy the filehandle. If not OK, no
7534 7537 * filehandle to copy.
7535 7538 */
7536 7539 if (replay == TRUE &&
7537 7540 resp->status == NFS4_OK &&
7538 7541 oo->ro_reply_fh.nfs_fh4_val) {
7539 7542 /*
7540 7543 * If this is a replay, we must restore the
7541 7544 * current filehandle/vp to that of what was
7542 7545 * returned originally. Try our best to do
7543 7546 * it.
7544 7547 */
7545 7548 nfs_fh4_fmt_t *fh_fmtp =
7546 7549 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7547 7550
7548 7551 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7549 7552 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7550 7553
7551 7554 if (cs->exi == NULL) {
7552 7555 resp->status = NFS4ERR_STALE;
7553 7556 goto finish;
7554 7557 }
7555 7558
7556 7559 VN_RELE(cs->vp);
7557 7560
7558 7561 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7559 7562 &resp->status);
7560 7563
7561 7564 if (cs->vp == NULL)
7562 7565 goto finish;
7563 7566
7564 7567 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7565 7568 }
7566 7569
7567 7570 /*
7568 7571 * If this was a replay, no need to update the
7569 7572 * sequence id. If the open_owner was not created on
7570 7573 * this pass, then update. The first use of an
7571 7574 * open_owner will not bump the sequence id.
7572 7575 */
7573 7576 if (replay == FALSE && !create)
7574 7577 rfs4_update_open_sequence(oo);
7575 7578 /*
7576 7579 * If the client is receiving an error and the
7577 7580 * open_owner needs to be confirmed, there is no way
7578 7581 * to notify the client of this fact ignoring the fact
7579 7582 * that the server has no method of returning a
7580 7583 * stateid to confirm. Therefore, the server needs to
7581 7584 * mark this open_owner in a way as to avoid the
7582 7585 * sequence id checking the next time the client uses
7583 7586 * this open_owner.
7584 7587 */
7585 7588 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7586 7589 oo->ro_postpone_confirm = TRUE;
7587 7590 /*
7588 7591 * If OK response then clear the postpone flag and
7589 7592 * reset the sequence id to keep in sync with the
7590 7593 * client.
7591 7594 */
7592 7595 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7593 7596 oo->ro_postpone_confirm = FALSE;
7594 7597 oo->ro_open_seqid = args->seqid;
7595 7598 }
7596 7599 break;
7597 7600 }
7598 7601
7599 7602 finish:
7600 7603 *cs->statusp = resp->status;
7601 7604
7602 7605 rfs4_sw_exit(&oo->ro_sw);
7603 7606 rfs4_openowner_rele(oo);
7604 7607
7605 7608 end:
7606 7609 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7607 7610 OPEN4res *, resp);
7608 7611 }
7609 7612
7610 7613 /*ARGSUSED*/
7611 7614 void
7612 7615 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7613 7616 struct svc_req *req, struct compound_state *cs)
7614 7617 {
7615 7618 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7616 7619 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7617 7620 rfs4_state_t *sp;
7618 7621 nfsstat4 status;
7619 7622
7620 7623 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7621 7624 OPEN_CONFIRM4args *, args);
7622 7625
7623 7626 if (cs->vp == NULL) {
7624 7627 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7625 7628 goto out;
7626 7629 }
7627 7630
7628 7631 if (cs->vp->v_type != VREG) {
7629 7632 *cs->statusp = resp->status =
7630 7633 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7631 7634 return;
7632 7635 }
7633 7636
7634 7637 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7635 7638 if (status != NFS4_OK) {
7636 7639 *cs->statusp = resp->status = status;
7637 7640 goto out;
7638 7641 }
7639 7642
7640 7643 /* Ensure specified filehandle matches */
7641 7644 if (cs->vp != sp->rs_finfo->rf_vp) {
7642 7645 rfs4_state_rele(sp);
7643 7646 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7644 7647 goto out;
7645 7648 }
7646 7649
7647 7650 /* hold off other access to open_owner while we tinker */
7648 7651 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7649 7652
7650 7653 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7651 7654 case NFS4_CHECK_STATEID_OKAY:
7652 7655 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7653 7656 resop) != 0) {
7654 7657 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7655 7658 break;
7656 7659 }
7657 7660 /*
7658 7661 * If it is the appropriate stateid and determined to
7659 7662 * be "OKAY" then this means that the stateid does not
7660 7663 * need to be confirmed and the client is in error for
7661 7664 * sending an OPEN_CONFIRM.
7662 7665 */
7663 7666 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7664 7667 break;
7665 7668 case NFS4_CHECK_STATEID_OLD:
7666 7669 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7667 7670 break;
7668 7671 case NFS4_CHECK_STATEID_BAD:
7669 7672 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7670 7673 break;
7671 7674 case NFS4_CHECK_STATEID_EXPIRED:
7672 7675 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7673 7676 break;
7674 7677 case NFS4_CHECK_STATEID_CLOSED:
7675 7678 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7676 7679 break;
7677 7680 case NFS4_CHECK_STATEID_REPLAY:
7678 7681 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7679 7682 resop)) {
7680 7683 case NFS4_CHKSEQ_OKAY:
7681 7684 /*
7682 7685 * This is replayed stateid; if seqid matches
7683 7686 * next expected, then client is using wrong seqid.
7684 7687 */
7685 7688 /* fall through */
7686 7689 case NFS4_CHKSEQ_BAD:
7687 7690 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7688 7691 break;
7689 7692 case NFS4_CHKSEQ_REPLAY:
7690 7693 /*
7691 7694 * Note this case is the duplicate case so
7692 7695 * resp->status is already set.
7693 7696 */
7694 7697 *cs->statusp = resp->status;
7695 7698 rfs4_update_lease(sp->rs_owner->ro_client);
7696 7699 break;
7697 7700 }
7698 7701 break;
7699 7702 case NFS4_CHECK_STATEID_UNCONFIRMED:
7700 7703 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7701 7704 resop) != NFS4_CHKSEQ_OKAY) {
7702 7705 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7703 7706 break;
7704 7707 }
7705 7708 *cs->statusp = resp->status = NFS4_OK;
7706 7709
7707 7710 next_stateid(&sp->rs_stateid);
7708 7711 resp->open_stateid = sp->rs_stateid.stateid;
7709 7712 sp->rs_owner->ro_need_confirm = FALSE;
7710 7713 rfs4_update_lease(sp->rs_owner->ro_client);
7711 7714 rfs4_update_open_sequence(sp->rs_owner);
7712 7715 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7713 7716 break;
7714 7717 default:
7715 7718 ASSERT(FALSE);
7716 7719 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7717 7720 break;
7718 7721 }
7719 7722 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7720 7723 rfs4_state_rele(sp);
7721 7724
7722 7725 out:
7723 7726 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7724 7727 OPEN_CONFIRM4res *, resp);
7725 7728 }
7726 7729
7727 7730 /*ARGSUSED*/
7728 7731 void
7729 7732 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7730 7733 struct svc_req *req, struct compound_state *cs)
7731 7734 {
7732 7735 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7733 7736 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7734 7737 uint32_t access = args->share_access;
7735 7738 uint32_t deny = args->share_deny;
7736 7739 nfsstat4 status;
7737 7740 rfs4_state_t *sp;
7738 7741 rfs4_file_t *fp;
7739 7742 int fflags = 0;
7740 7743
7741 7744 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7742 7745 OPEN_DOWNGRADE4args *, args);
7743 7746
7744 7747 if (cs->vp == NULL) {
7745 7748 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7746 7749 goto out;
7747 7750 }
7748 7751
7749 7752 if (cs->vp->v_type != VREG) {
7750 7753 *cs->statusp = resp->status = NFS4ERR_INVAL;
7751 7754 return;
7752 7755 }
7753 7756
7754 7757 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7755 7758 if (status != NFS4_OK) {
7756 7759 *cs->statusp = resp->status = status;
7757 7760 goto out;
7758 7761 }
7759 7762
7760 7763 /* Ensure specified filehandle matches */
7761 7764 if (cs->vp != sp->rs_finfo->rf_vp) {
7762 7765 rfs4_state_rele(sp);
7763 7766 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7764 7767 goto out;
7765 7768 }
7766 7769
7767 7770 /* hold off other access to open_owner while we tinker */
7768 7771 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7769 7772
7770 7773 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7771 7774 case NFS4_CHECK_STATEID_OKAY:
7772 7775 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7773 7776 resop) != NFS4_CHKSEQ_OKAY) {
7774 7777 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7775 7778 goto end;
7776 7779 }
7777 7780 break;
7778 7781 case NFS4_CHECK_STATEID_OLD:
7779 7782 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7780 7783 goto end;
7781 7784 case NFS4_CHECK_STATEID_BAD:
7782 7785 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7783 7786 goto end;
7784 7787 case NFS4_CHECK_STATEID_EXPIRED:
7785 7788 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7786 7789 goto end;
7787 7790 case NFS4_CHECK_STATEID_CLOSED:
7788 7791 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7789 7792 goto end;
7790 7793 case NFS4_CHECK_STATEID_UNCONFIRMED:
7791 7794 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7792 7795 goto end;
7793 7796 case NFS4_CHECK_STATEID_REPLAY:
7794 7797 /* Check the sequence id for the open owner */
7795 7798 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7796 7799 resop)) {
7797 7800 case NFS4_CHKSEQ_OKAY:
7798 7801 /*
7799 7802 * This is replayed stateid; if seqid matches
7800 7803 * next expected, then client is using wrong seqid.
7801 7804 */
7802 7805 /* fall through */
7803 7806 case NFS4_CHKSEQ_BAD:
7804 7807 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7805 7808 goto end;
7806 7809 case NFS4_CHKSEQ_REPLAY:
7807 7810 /*
7808 7811 * Note this case is the duplicate case so
7809 7812 * resp->status is already set.
7810 7813 */
7811 7814 *cs->statusp = resp->status;
7812 7815 rfs4_update_lease(sp->rs_owner->ro_client);
7813 7816 goto end;
7814 7817 }
7815 7818 break;
7816 7819 default:
7817 7820 ASSERT(FALSE);
7818 7821 break;
7819 7822 }
7820 7823
7821 7824 rfs4_dbe_lock(sp->rs_dbe);
7822 7825 /*
7823 7826 * Check that the new access modes and deny modes are valid.
7824 7827 * Check that no invalid bits are set.
7825 7828 */
7826 7829 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7827 7830 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7828 7831 *cs->statusp = resp->status = NFS4ERR_INVAL;
7829 7832 rfs4_update_open_sequence(sp->rs_owner);
7830 7833 rfs4_dbe_unlock(sp->rs_dbe);
7831 7834 goto end;
7832 7835 }
7833 7836
7834 7837 /*
7835 7838 * The new modes must be a subset of the current modes and
7836 7839 * the access must specify at least one mode. To test that
7837 7840 * the new mode is a subset of the current modes we bitwise
7838 7841 * AND them together and check that the result equals the new
7839 7842 * mode. For example:
7840 7843 * New mode, access == R and current mode, sp->rs_open_access == RW
7841 7844 * access & sp->rs_open_access == R == access, so the new access mode
7842 7845 * is valid. Consider access == RW, sp->rs_open_access = R
7843 7846 * access & sp->rs_open_access == R != access, so the new access mode
7844 7847 * is invalid.
7845 7848 */
7846 7849 if ((access & sp->rs_open_access) != access ||
7847 7850 (deny & sp->rs_open_deny) != deny ||
7848 7851 (access &
7849 7852 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7850 7853 *cs->statusp = resp->status = NFS4ERR_INVAL;
7851 7854 rfs4_update_open_sequence(sp->rs_owner);
7852 7855 rfs4_dbe_unlock(sp->rs_dbe);
7853 7856 goto end;
7854 7857 }
7855 7858
7856 7859 /*
7857 7860 * Release any share locks associated with this stateID.
7858 7861 * Strictly speaking, this violates the spec because the
7859 7862 * spec effectively requires that open downgrade be atomic.
7860 7863 * At present, fs_shrlock does not have this capability.
7861 7864 */
7862 7865 (void) rfs4_unshare(sp);
7863 7866
7864 7867 status = rfs4_share(sp, access, deny);
7865 7868 if (status != NFS4_OK) {
7866 7869 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7867 7870 rfs4_update_open_sequence(sp->rs_owner);
7868 7871 rfs4_dbe_unlock(sp->rs_dbe);
7869 7872 goto end;
7870 7873 }
7871 7874
7872 7875 fp = sp->rs_finfo;
7873 7876 rfs4_dbe_lock(fp->rf_dbe);
7874 7877
7875 7878 /*
7876 7879 * If the current mode has deny read and the new mode
7877 7880 * does not, decrement the number of deny read mode bits
7878 7881 * and if it goes to zero turn off the deny read bit
7879 7882 * on the file.
7880 7883 */
7881 7884 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7882 7885 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7883 7886 fp->rf_deny_read--;
7884 7887 if (fp->rf_deny_read == 0)
7885 7888 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7886 7889 }
7887 7890
7888 7891 /*
7889 7892 * If the current mode has deny write and the new mode
7890 7893 * does not, decrement the number of deny write mode bits
7891 7894 * and if it goes to zero turn off the deny write bit
7892 7895 * on the file.
7893 7896 */
7894 7897 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7895 7898 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7896 7899 fp->rf_deny_write--;
7897 7900 if (fp->rf_deny_write == 0)
7898 7901 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7899 7902 }
7900 7903
7901 7904 /*
7902 7905 * If the current mode has access read and the new mode
7903 7906 * does not, decrement the number of access read mode bits
7904 7907 * and if it goes to zero turn off the access read bit
7905 7908 * on the file. set fflags to FREAD for the call to
7906 7909 * vn_open_downgrade().
7907 7910 */
7908 7911 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7909 7912 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7910 7913 fp->rf_access_read--;
7911 7914 if (fp->rf_access_read == 0)
7912 7915 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7913 7916 fflags |= FREAD;
7914 7917 }
7915 7918
7916 7919 /*
7917 7920 * If the current mode has access write and the new mode
7918 7921 * does not, decrement the number of access write mode bits
7919 7922 * and if it goes to zero turn off the access write bit
7920 7923 * on the file. set fflags to FWRITE for the call to
7921 7924 * vn_open_downgrade().
7922 7925 */
7923 7926 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7924 7927 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7925 7928 fp->rf_access_write--;
7926 7929 if (fp->rf_access_write == 0)
7927 7930 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7928 7931 fflags |= FWRITE;
7929 7932 }
7930 7933
7931 7934 /* Check that the file is still accessible */
7932 7935 ASSERT(fp->rf_share_access);
7933 7936
7934 7937 rfs4_dbe_unlock(fp->rf_dbe);
7935 7938
7936 7939 /* now set the new open access and deny modes */
7937 7940 sp->rs_open_access = access;
7938 7941 sp->rs_open_deny = deny;
7939 7942
7940 7943 /*
7941 7944 * we successfully downgraded the share lock, now we need to downgrade
7942 7945 * the open. it is possible that the downgrade was only for a deny
7943 7946 * mode and we have nothing else to do.
7944 7947 */
7945 7948 if ((fflags & (FREAD|FWRITE)) != 0)
7946 7949 vn_open_downgrade(cs->vp, fflags);
7947 7950
7948 7951 /* Update the stateid */
7949 7952 next_stateid(&sp->rs_stateid);
7950 7953 resp->open_stateid = sp->rs_stateid.stateid;
7951 7954
7952 7955 rfs4_dbe_unlock(sp->rs_dbe);
7953 7956
7954 7957 *cs->statusp = resp->status = NFS4_OK;
7955 7958 /* Update the lease */
7956 7959 rfs4_update_lease(sp->rs_owner->ro_client);
7957 7960 /* And the sequence */
7958 7961 rfs4_update_open_sequence(sp->rs_owner);
7959 7962 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7960 7963
7961 7964 end:
7962 7965 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7963 7966 rfs4_state_rele(sp);
7964 7967 out:
7965 7968 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7966 7969 OPEN_DOWNGRADE4res *, resp);
7967 7970 }
7968 7971
7969 7972 static void *
7970 7973 memstr(const void *s1, const char *s2, size_t n)
7971 7974 {
7972 7975 size_t l = strlen(s2);
7973 7976 char *p = (char *)s1;
7974 7977
7975 7978 while (n >= l) {
7976 7979 if (bcmp(p, s2, l) == 0)
7977 7980 return (p);
7978 7981 p++;
7979 7982 n--;
7980 7983 }
7981 7984
7982 7985 return (NULL);
7983 7986 }
7984 7987
7985 7988 /*
7986 7989 * The logic behind this function is detailed in the NFSv4 RFC in the
7987 7990 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
7988 7991 * that section for explicit guidance to server behavior for
7989 7992 * SETCLIENTID.
7990 7993 */
7991 7994 void
7992 7995 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7993 7996 struct svc_req *req, struct compound_state *cs)
7994 7997 {
7995 7998 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7996 7999 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7997 8000 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7998 8001 rfs4_clntip_t *ci;
7999 8002 bool_t create;
8000 8003 char *addr, *netid;
8001 8004 int len;
8002 8005
8003 8006 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8004 8007 SETCLIENTID4args *, args);
8005 8008 retry:
8006 8009 newcp = cp_confirmed = cp_unconfirmed = NULL;
8007 8010
8008 8011 /*
8009 8012 * Save the caller's IP address
8010 8013 */
8011 8014 args->client.cl_addr =
8012 8015 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8013 8016
8014 8017 /*
8015 8018 * Record if it is a Solaris client that cannot handle referrals.
8016 8019 */
8017 8020 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8018 8021 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8019 8022 /* Add a "yes, it's downrev" record */
8020 8023 create = TRUE;
8021 8024 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8022 8025 ASSERT(ci != NULL);
8023 8026 rfs4_dbe_rele(ci->ri_dbe);
8024 8027 } else {
8025 8028 /* Remove any previous record */
8026 8029 rfs4_invalidate_clntip(args->client.cl_addr);
8027 8030 }
8028 8031
8029 8032 /*
8030 8033 * In search of an EXISTING client matching the incoming
8031 8034 * request to establish a new client identifier at the server
8032 8035 */
8033 8036 create = TRUE;
8034 8037 cp = rfs4_findclient(&args->client, &create, NULL);
8035 8038
8036 8039 /* Should never happen */
8037 8040 ASSERT(cp != NULL);
8038 8041
8039 8042 if (cp == NULL) {
8040 8043 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8041 8044 goto out;
8042 8045 }
8043 8046
8044 8047 /*
8045 8048 * Easiest case. Client identifier is newly created and is
8046 8049 * unconfirmed. Also note that for this case, no other
8047 8050 * entries exist for the client identifier. Nothing else to
8048 8051 * check. Just setup the response and respond.
8049 8052 */
8050 8053 if (create) {
8051 8054 *cs->statusp = res->status = NFS4_OK;
8052 8055 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8053 8056 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8054 8057 cp->rc_confirm_verf;
8055 8058 /* Setup callback information; CB_NULL confirmation later */
8056 8059 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8057 8060
8058 8061 rfs4_client_rele(cp);
8059 8062 goto out;
8060 8063 }
8061 8064
8062 8065 /*
8063 8066 * An existing, confirmed client may exist but it may not have
8064 8067 * been active for at least one lease period. If so, then
8065 8068 * "close" the client and create a new client identifier
8066 8069 */
8067 8070 if (rfs4_lease_expired(cp)) {
8068 8071 rfs4_client_close(cp);
8069 8072 goto retry;
8070 8073 }
8071 8074
8072 8075 if (cp->rc_need_confirm == TRUE)
8073 8076 cp_unconfirmed = cp;
8074 8077 else
8075 8078 cp_confirmed = cp;
8076 8079
8077 8080 cp = NULL;
8078 8081
8079 8082 /*
8080 8083 * We have a confirmed client, now check for an
8081 8084 * unconfimred entry
8082 8085 */
8083 8086 if (cp_confirmed) {
8084 8087 /* If creds don't match then client identifier is inuse */
8085 8088 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8086 8089 rfs4_cbinfo_t *cbp;
8087 8090 /*
8088 8091 * Some one else has established this client
8089 8092 * id. Try and say * who they are. We will use
8090 8093 * the call back address supplied by * the
8091 8094 * first client.
8092 8095 */
8093 8096 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8094 8097
8095 8098 addr = netid = NULL;
8096 8099
8097 8100 cbp = &cp_confirmed->rc_cbinfo;
8098 8101 if (cbp->cb_callback.cb_location.r_addr &&
8099 8102 cbp->cb_callback.cb_location.r_netid) {
8100 8103 cb_client4 *cbcp = &cbp->cb_callback;
8101 8104
8102 8105 len = strlen(cbcp->cb_location.r_addr)+1;
8103 8106 addr = kmem_alloc(len, KM_SLEEP);
8104 8107 bcopy(cbcp->cb_location.r_addr, addr, len);
8105 8108 len = strlen(cbcp->cb_location.r_netid)+1;
8106 8109 netid = kmem_alloc(len, KM_SLEEP);
8107 8110 bcopy(cbcp->cb_location.r_netid, netid, len);
8108 8111 }
8109 8112
8110 8113 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8111 8114 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8112 8115
8113 8116 rfs4_client_rele(cp_confirmed);
8114 8117 }
8115 8118
8116 8119 /*
8117 8120 * Confirmed, creds match, and verifier matches; must
8118 8121 * be an update of the callback info
8119 8122 */
8120 8123 if (cp_confirmed->rc_nfs_client.verifier ==
8121 8124 args->client.verifier) {
8122 8125 /* Setup callback information */
8123 8126 rfs4_client_setcb(cp_confirmed, &args->callback,
8124 8127 args->callback_ident);
8125 8128
8126 8129 /* everything okay -- move ahead */
8127 8130 *cs->statusp = res->status = NFS4_OK;
8128 8131 res->SETCLIENTID4res_u.resok4.clientid =
8129 8132 cp_confirmed->rc_clientid;
8130 8133
8131 8134 /* update the confirm_verifier and return it */
8132 8135 rfs4_client_scv_next(cp_confirmed);
8133 8136 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8134 8137 cp_confirmed->rc_confirm_verf;
8135 8138
8136 8139 rfs4_client_rele(cp_confirmed);
8137 8140 goto out;
8138 8141 }
8139 8142
8140 8143 /*
8141 8144 * Creds match but the verifier doesn't. Must search
8142 8145 * for an unconfirmed client that would be replaced by
8143 8146 * this request.
8144 8147 */
8145 8148 create = FALSE;
8146 8149 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8147 8150 cp_confirmed);
8148 8151 }
8149 8152
8150 8153 /*
8151 8154 * At this point, we have taken care of the brand new client
8152 8155 * struct, INUSE case, update of an existing, and confirmed
8153 8156 * client struct.
8154 8157 */
8155 8158
8156 8159 /*
8157 8160 * check to see if things have changed while we originally
8158 8161 * picked up the client struct. If they have, then return and
8159 8162 * retry the processing of this SETCLIENTID request.
8160 8163 */
8161 8164 if (cp_unconfirmed) {
8162 8165 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8163 8166 if (!cp_unconfirmed->rc_need_confirm) {
8164 8167 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8165 8168 rfs4_client_rele(cp_unconfirmed);
8166 8169 if (cp_confirmed)
8167 8170 rfs4_client_rele(cp_confirmed);
8168 8171 goto retry;
8169 8172 }
8170 8173 /* do away with the old unconfirmed one */
8171 8174 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8172 8175 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8173 8176 rfs4_client_rele(cp_unconfirmed);
8174 8177 cp_unconfirmed = NULL;
8175 8178 }
8176 8179
8177 8180 /*
8178 8181 * This search will temporarily hide the confirmed client
8179 8182 * struct while a new client struct is created as the
8180 8183 * unconfirmed one.
8181 8184 */
8182 8185 create = TRUE;
8183 8186 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8184 8187
8185 8188 ASSERT(newcp != NULL);
8186 8189
8187 8190 if (newcp == NULL) {
8188 8191 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8189 8192 rfs4_client_rele(cp_confirmed);
8190 8193 goto out;
8191 8194 }
8192 8195
8193 8196 /*
8194 8197 * If one was not created, then a similar request must be in
8195 8198 * process so release and start over with this one
8196 8199 */
8197 8200 if (create != TRUE) {
8198 8201 rfs4_client_rele(newcp);
8199 8202 if (cp_confirmed)
8200 8203 rfs4_client_rele(cp_confirmed);
8201 8204 goto retry;
8202 8205 }
8203 8206
8204 8207 *cs->statusp = res->status = NFS4_OK;
8205 8208 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8206 8209 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8207 8210 newcp->rc_confirm_verf;
8208 8211 /* Setup callback information; CB_NULL confirmation later */
8209 8212 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8210 8213
8211 8214 newcp->rc_cp_confirmed = cp_confirmed;
8212 8215
8213 8216 rfs4_client_rele(newcp);
8214 8217
8215 8218 out:
8216 8219 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8217 8220 SETCLIENTID4res *, res);
8218 8221 }
8219 8222
8220 8223 /*ARGSUSED*/
8221 8224 void
8222 8225 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8223 8226 struct svc_req *req, struct compound_state *cs)
8224 8227 {
8225 8228 SETCLIENTID_CONFIRM4args *args =
|
↓ open down ↓ |
1556 lines elided |
↑ open up ↑ |
8226 8229 &argop->nfs_argop4_u.opsetclientid_confirm;
8227 8230 SETCLIENTID_CONFIRM4res *res =
8228 8231 &resop->nfs_resop4_u.opsetclientid_confirm;
8229 8232 rfs4_client_t *cp, *cptoclose = NULL;
8230 8233 nfs4_srv_t *nsrv4;
8231 8234
8232 8235 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8233 8236 struct compound_state *, cs,
8234 8237 SETCLIENTID_CONFIRM4args *, args);
8235 8238
8236 - nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
8239 + nsrv4 = nfs4_get_srv();
8237 8240 *cs->statusp = res->status = NFS4_OK;
8238 8241
8239 8242 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8240 8243
8241 8244 if (cp == NULL) {
8242 8245 *cs->statusp = res->status =
8243 8246 rfs4_check_clientid(&args->clientid, 1);
8244 8247 goto out;
8245 8248 }
8246 8249
8247 8250 if (!creds_ok(cp, req, cs)) {
8248 8251 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8249 8252 rfs4_client_rele(cp);
8250 8253 goto out;
8251 8254 }
8252 8255
8253 8256 /* If the verifier doesn't match, the record doesn't match */
8254 8257 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8255 8258 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8256 8259 rfs4_client_rele(cp);
8257 8260 goto out;
8258 8261 }
8259 8262
8260 8263 rfs4_dbe_lock(cp->rc_dbe);
8261 8264 cp->rc_need_confirm = FALSE;
8262 8265 if (cp->rc_cp_confirmed) {
8263 8266 cptoclose = cp->rc_cp_confirmed;
8264 8267 cptoclose->rc_ss_remove = 1;
8265 8268 cp->rc_cp_confirmed = NULL;
8266 8269 }
8267 8270
8268 8271 /*
8269 8272 * Update the client's associated server instance, if it's changed
8270 8273 * since the client was created.
8271 8274 */
8272 8275 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8273 8276 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8274 8277
8275 8278 /*
8276 8279 * Record clientid in stable storage.
8277 8280 * Must be done after server instance has been assigned.
8278 8281 */
8279 8282 rfs4_ss_clid(nsrv4, cp);
8280 8283
8281 8284 rfs4_dbe_unlock(cp->rc_dbe);
8282 8285
8283 8286 if (cptoclose)
8284 8287 /* don't need to rele, client_close does it */
8285 8288 rfs4_client_close(cptoclose);
8286 8289
8287 8290 /* If needed, initiate CB_NULL call for callback path */
8288 8291 rfs4_deleg_cb_check(cp);
8289 8292 rfs4_update_lease(cp);
8290 8293
8291 8294 /*
8292 8295 * Check to see if client can perform reclaims
8293 8296 */
8294 8297 rfs4_ss_chkclid(nsrv4, cp);
8295 8298
8296 8299 rfs4_client_rele(cp);
8297 8300
8298 8301 out:
8299 8302 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8300 8303 struct compound_state *, cs,
8301 8304 SETCLIENTID_CONFIRM4 *, res);
8302 8305 }
8303 8306
8304 8307
8305 8308 /*ARGSUSED*/
8306 8309 void
8307 8310 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8308 8311 struct svc_req *req, struct compound_state *cs)
8309 8312 {
8310 8313 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8311 8314 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8312 8315 rfs4_state_t *sp;
8313 8316 nfsstat4 status;
8314 8317
8315 8318 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8316 8319 CLOSE4args *, args);
8317 8320
8318 8321 if (cs->vp == NULL) {
8319 8322 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8320 8323 goto out;
8321 8324 }
8322 8325
8323 8326 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8324 8327 if (status != NFS4_OK) {
8325 8328 *cs->statusp = resp->status = status;
8326 8329 goto out;
8327 8330 }
8328 8331
8329 8332 /* Ensure specified filehandle matches */
8330 8333 if (cs->vp != sp->rs_finfo->rf_vp) {
8331 8334 rfs4_state_rele(sp);
8332 8335 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8333 8336 goto out;
8334 8337 }
8335 8338
8336 8339 /* hold off other access to open_owner while we tinker */
8337 8340 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8338 8341
8339 8342 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8340 8343 case NFS4_CHECK_STATEID_OKAY:
8341 8344 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8342 8345 resop) != NFS4_CHKSEQ_OKAY) {
8343 8346 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8344 8347 goto end;
8345 8348 }
8346 8349 break;
8347 8350 case NFS4_CHECK_STATEID_OLD:
8348 8351 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8349 8352 goto end;
8350 8353 case NFS4_CHECK_STATEID_BAD:
8351 8354 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8352 8355 goto end;
8353 8356 case NFS4_CHECK_STATEID_EXPIRED:
8354 8357 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8355 8358 goto end;
8356 8359 case NFS4_CHECK_STATEID_CLOSED:
8357 8360 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8358 8361 goto end;
8359 8362 case NFS4_CHECK_STATEID_UNCONFIRMED:
8360 8363 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8361 8364 goto end;
8362 8365 case NFS4_CHECK_STATEID_REPLAY:
8363 8366 /* Check the sequence id for the open owner */
8364 8367 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8365 8368 resop)) {
8366 8369 case NFS4_CHKSEQ_OKAY:
8367 8370 /*
8368 8371 * This is replayed stateid; if seqid matches
8369 8372 * next expected, then client is using wrong seqid.
8370 8373 */
8371 8374 /* FALL THROUGH */
8372 8375 case NFS4_CHKSEQ_BAD:
8373 8376 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8374 8377 goto end;
8375 8378 case NFS4_CHKSEQ_REPLAY:
8376 8379 /*
8377 8380 * Note this case is the duplicate case so
8378 8381 * resp->status is already set.
8379 8382 */
8380 8383 *cs->statusp = resp->status;
8381 8384 rfs4_update_lease(sp->rs_owner->ro_client);
8382 8385 goto end;
8383 8386 }
8384 8387 break;
8385 8388 default:
8386 8389 ASSERT(FALSE);
8387 8390 break;
8388 8391 }
8389 8392
8390 8393 rfs4_dbe_lock(sp->rs_dbe);
8391 8394
8392 8395 /* Update the stateid. */
8393 8396 next_stateid(&sp->rs_stateid);
8394 8397 resp->open_stateid = sp->rs_stateid.stateid;
8395 8398
8396 8399 rfs4_dbe_unlock(sp->rs_dbe);
8397 8400
8398 8401 rfs4_update_lease(sp->rs_owner->ro_client);
8399 8402 rfs4_update_open_sequence(sp->rs_owner);
8400 8403 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8401 8404
8402 8405 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8403 8406
8404 8407 *cs->statusp = resp->status = status;
8405 8408
8406 8409 end:
8407 8410 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8408 8411 rfs4_state_rele(sp);
8409 8412 out:
8410 8413 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8411 8414 CLOSE4res *, resp);
8412 8415 }
8413 8416
8414 8417 /*
8415 8418 * Manage the counts on the file struct and close all file locks
8416 8419 */
8417 8420 /*ARGSUSED*/
8418 8421 void
8419 8422 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8420 8423 bool_t close_of_client)
8421 8424 {
8422 8425 rfs4_file_t *fp = sp->rs_finfo;
8423 8426 rfs4_lo_state_t *lsp;
8424 8427 int fflags = 0;
8425 8428
8426 8429 /*
8427 8430 * If this call is part of the larger closing down of client
8428 8431 * state then it is just easier to release all locks
8429 8432 * associated with this client instead of going through each
8430 8433 * individual file and cleaning locks there.
8431 8434 */
8432 8435 if (close_of_client) {
8433 8436 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8434 8437 !list_is_empty(&sp->rs_lostatelist) &&
8435 8438 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8436 8439 /* Is the PxFS kernel module loaded? */
8437 8440 if (lm_remove_file_locks != NULL) {
8438 8441 int new_sysid;
8439 8442
8440 8443 /* Encode the cluster nodeid in new sysid */
8441 8444 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8442 8445 lm_set_nlmid_flk(&new_sysid);
8443 8446
8444 8447 /*
8445 8448 * This PxFS routine removes file locks for a
8446 8449 * client over all nodes of a cluster.
8447 8450 */
8448 8451 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8449 8452 "lm_remove_file_locks(sysid=0x%x)\n",
8450 8453 new_sysid));
8451 8454 (*lm_remove_file_locks)(new_sysid);
8452 8455 } else {
8453 8456 struct flock64 flk;
8454 8457
8455 8458 /* Release all locks for this client */
8456 8459 flk.l_type = F_UNLKSYS;
8457 8460 flk.l_whence = 0;
8458 8461 flk.l_start = 0;
8459 8462 flk.l_len = 0;
8460 8463 flk.l_sysid =
8461 8464 sp->rs_owner->ro_client->rc_sysidt;
8462 8465 flk.l_pid = 0;
8463 8466 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8464 8467 &flk, F_REMOTELOCK | FREAD | FWRITE,
8465 8468 (u_offset_t)0, NULL, CRED(), NULL);
8466 8469 }
8467 8470
8468 8471 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8469 8472 }
8470 8473 }
8471 8474
8472 8475 /*
8473 8476 * Release all locks on this file by this lock owner or at
8474 8477 * least mark the locks as having been released
8475 8478 */
8476 8479 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8477 8480 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8478 8481 lsp->rls_locks_cleaned = TRUE;
8479 8482
8480 8483 /* Was this already taken care of above? */
8481 8484 if (!close_of_client &&
8482 8485 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8483 8486 (void) cleanlocks(sp->rs_finfo->rf_vp,
8484 8487 lsp->rls_locker->rl_pid,
8485 8488 lsp->rls_locker->rl_client->rc_sysidt);
8486 8489 }
8487 8490
8488 8491 /*
8489 8492 * Release any shrlocks associated with this open state ID.
8490 8493 * This must be done before the rfs4_state gets marked closed.
8491 8494 */
8492 8495 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8493 8496 (void) rfs4_unshare(sp);
8494 8497
8495 8498 if (sp->rs_open_access) {
8496 8499 rfs4_dbe_lock(fp->rf_dbe);
8497 8500
8498 8501 /*
8499 8502 * Decrement the count for each access and deny bit that this
8500 8503 * state has contributed to the file.
8501 8504 * If the file counts go to zero
8502 8505 * clear the appropriate bit in the appropriate mask.
8503 8506 */
8504 8507 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8505 8508 fp->rf_access_read--;
8506 8509 fflags |= FREAD;
8507 8510 if (fp->rf_access_read == 0)
8508 8511 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8509 8512 }
8510 8513 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8511 8514 fp->rf_access_write--;
8512 8515 fflags |= FWRITE;
8513 8516 if (fp->rf_access_write == 0)
8514 8517 fp->rf_share_access &=
8515 8518 ~OPEN4_SHARE_ACCESS_WRITE;
8516 8519 }
8517 8520 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8518 8521 fp->rf_deny_read--;
8519 8522 if (fp->rf_deny_read == 0)
8520 8523 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8521 8524 }
8522 8525 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8523 8526 fp->rf_deny_write--;
8524 8527 if (fp->rf_deny_write == 0)
8525 8528 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8526 8529 }
8527 8530
8528 8531 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8529 8532
8530 8533 rfs4_dbe_unlock(fp->rf_dbe);
8531 8534
8532 8535 sp->rs_open_access = 0;
8533 8536 sp->rs_open_deny = 0;
8534 8537 }
8535 8538 }
8536 8539
8537 8540 /*
8538 8541 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8539 8542 */
8540 8543 static nfsstat4
8541 8544 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8542 8545 {
8543 8546 rfs4_lockowner_t *lo;
8544 8547 rfs4_client_t *cp;
8545 8548 uint32_t len;
8546 8549
8547 8550 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8548 8551 if (lo != NULL) {
8549 8552 cp = lo->rl_client;
8550 8553 if (rfs4_lease_expired(cp)) {
8551 8554 rfs4_lockowner_rele(lo);
8552 8555 rfs4_dbe_hold(cp->rc_dbe);
8553 8556 rfs4_client_close(cp);
8554 8557 return (NFS4ERR_EXPIRED);
8555 8558 }
8556 8559 dp->owner.clientid = lo->rl_owner.clientid;
8557 8560 len = lo->rl_owner.owner_len;
8558 8561 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8559 8562 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8560 8563 dp->owner.owner_len = len;
8561 8564 rfs4_lockowner_rele(lo);
8562 8565 goto finish;
8563 8566 }
8564 8567
8565 8568 /*
8566 8569 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8567 8570 * of the client id contain the boot time for a NFS4 lock. So we
8568 8571 * fabricate and identity by setting clientid to the sysid, and
8569 8572 * the lock owner to the pid.
8570 8573 */
8571 8574 dp->owner.clientid = flk->l_sysid;
8572 8575 len = sizeof (pid_t);
8573 8576 dp->owner.owner_len = len;
8574 8577 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8575 8578 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8576 8579 finish:
8577 8580 dp->offset = flk->l_start;
8578 8581 dp->length = flk->l_len;
8579 8582
8580 8583 if (flk->l_type == F_RDLCK)
8581 8584 dp->locktype = READ_LT;
8582 8585 else if (flk->l_type == F_WRLCK)
8583 8586 dp->locktype = WRITE_LT;
8584 8587 else
8585 8588 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8586 8589
8587 8590 return (NFS4_OK);
8588 8591 }
8589 8592
8590 8593 /*
8591 8594 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8592 8595 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8593 8596 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8594 8597 * for that (obviously); they are sending the LOCK requests with some delays
8595 8598 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8596 8599 * locking and delay implementation at the client side.
8597 8600 *
8598 8601 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8599 8602 * fast retries on its own (the for loop below) in a hope the lock will be
8600 8603 * available soon. And if not, the client won't need to resend the LOCK
8601 8604 * requests so fast to check the lock availability. This basically saves some
8602 8605 * network traffic and tries to make sure the client gets the lock ASAP.
8603 8606 */
8604 8607 static int
8605 8608 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8606 8609 {
8607 8610 int error;
8608 8611 struct flock64 flk;
8609 8612 int i;
8610 8613 clock_t delaytime;
8611 8614 int cmd;
8612 8615 int spin_cnt = 0;
8613 8616
8614 8617 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8615 8618 retry:
8616 8619 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8617 8620
8618 8621 for (i = 0; i < rfs4_maxlock_tries; i++) {
8619 8622 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8620 8623 error = VOP_FRLOCK(vp, cmd,
8621 8624 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8622 8625
8623 8626 if (error != EAGAIN && error != EACCES)
8624 8627 break;
8625 8628
8626 8629 if (i < rfs4_maxlock_tries - 1) {
8627 8630 delay(delaytime);
8628 8631 delaytime *= 2;
8629 8632 }
8630 8633 }
8631 8634
8632 8635 if (error == EAGAIN || error == EACCES) {
8633 8636 /* Get the owner of the lock */
8634 8637 flk = *flock;
8635 8638 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8636 8639 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8637 8640 NULL) == 0) {
8638 8641 /*
8639 8642 * There's a race inherent in the current VOP_FRLOCK
8640 8643 * design where:
8641 8644 * a: "other guy" takes a lock that conflicts with a
8642 8645 * lock we want
8643 8646 * b: we attempt to take our lock (non-blocking) and
8644 8647 * the attempt fails.
8645 8648 * c: "other guy" releases the conflicting lock
8646 8649 * d: we ask what lock conflicts with the lock we want,
8647 8650 * getting F_UNLCK (no lock blocks us)
8648 8651 *
8649 8652 * If we retry the non-blocking lock attempt in this
8650 8653 * case (restart at step 'b') there's some possibility
8651 8654 * that many such attempts might fail. However a test
8652 8655 * designed to actually provoke this race shows that
8653 8656 * the vast majority of cases require no retry, and
8654 8657 * only a few took as many as three retries. Here's
8655 8658 * the test outcome:
8656 8659 *
8657 8660 * number of retries how many times we needed
8658 8661 * that many retries
8659 8662 * 0 79461
8660 8663 * 1 862
8661 8664 * 2 49
8662 8665 * 3 5
8663 8666 *
8664 8667 * Given those empirical results, we arbitrarily limit
8665 8668 * the retry count to ten.
8666 8669 *
8667 8670 * If we actually make to ten retries and give up,
8668 8671 * nothing catastrophic happens, but we're unable to
8669 8672 * return the information about the conflicting lock to
8670 8673 * the NFS client. That's an acceptable trade off vs.
8671 8674 * letting this retry loop run forever.
8672 8675 */
8673 8676 if (flk.l_type == F_UNLCK) {
8674 8677 if (spin_cnt++ < 10) {
8675 8678 /* No longer locked, retry */
8676 8679 goto retry;
8677 8680 }
8678 8681 } else {
8679 8682 *flock = flk;
8680 8683 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8681 8684 F_GETLK, &flk);
8682 8685 }
8683 8686 }
8684 8687 }
8685 8688
8686 8689 return (error);
8687 8690 }
8688 8691
8689 8692 /*ARGSUSED*/
8690 8693 static nfsstat4
8691 8694 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8692 8695 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8693 8696 {
8694 8697 nfsstat4 status;
8695 8698 rfs4_lockowner_t *lo = lsp->rls_locker;
8696 8699 rfs4_state_t *sp = lsp->rls_state;
8697 8700 struct flock64 flock;
8698 8701 int16_t ltype;
8699 8702 int flag;
8700 8703 int error;
8701 8704 sysid_t sysid;
8702 8705 LOCK4res *lres;
8703 8706 vnode_t *vp;
8704 8707
8705 8708 if (rfs4_lease_expired(lo->rl_client)) {
8706 8709 return (NFS4ERR_EXPIRED);
8707 8710 }
8708 8711
8709 8712 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8710 8713 return (status);
8711 8714
8712 8715 /* Check for zero length. To lock to end of file use all ones for V4 */
8713 8716 if (length == 0)
8714 8717 return (NFS4ERR_INVAL);
8715 8718 else if (length == (length4)(~0))
8716 8719 length = 0; /* Posix to end of file */
8717 8720
8718 8721 retry:
8719 8722 rfs4_dbe_lock(sp->rs_dbe);
8720 8723 if (sp->rs_closed == TRUE) {
8721 8724 rfs4_dbe_unlock(sp->rs_dbe);
8722 8725 return (NFS4ERR_OLD_STATEID);
8723 8726 }
8724 8727
8725 8728 if (resop->resop != OP_LOCKU) {
8726 8729 switch (locktype) {
8727 8730 case READ_LT:
8728 8731 case READW_LT:
8729 8732 if ((sp->rs_share_access
8730 8733 & OPEN4_SHARE_ACCESS_READ) == 0) {
8731 8734 rfs4_dbe_unlock(sp->rs_dbe);
8732 8735
8733 8736 return (NFS4ERR_OPENMODE);
8734 8737 }
8735 8738 ltype = F_RDLCK;
8736 8739 break;
8737 8740 case WRITE_LT:
8738 8741 case WRITEW_LT:
8739 8742 if ((sp->rs_share_access
8740 8743 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8741 8744 rfs4_dbe_unlock(sp->rs_dbe);
8742 8745
8743 8746 return (NFS4ERR_OPENMODE);
8744 8747 }
8745 8748 ltype = F_WRLCK;
8746 8749 break;
8747 8750 }
8748 8751 } else
8749 8752 ltype = F_UNLCK;
8750 8753
8751 8754 flock.l_type = ltype;
8752 8755 flock.l_whence = 0; /* SEEK_SET */
8753 8756 flock.l_start = offset;
8754 8757 flock.l_len = length;
8755 8758 flock.l_sysid = sysid;
8756 8759 flock.l_pid = lsp->rls_locker->rl_pid;
8757 8760
8758 8761 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8759 8762 if (flock.l_len < 0 || flock.l_start < 0) {
8760 8763 rfs4_dbe_unlock(sp->rs_dbe);
8761 8764 return (NFS4ERR_INVAL);
8762 8765 }
8763 8766
8764 8767 /*
8765 8768 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8766 8769 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8767 8770 */
8768 8771 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8769 8772
8770 8773 vp = sp->rs_finfo->rf_vp;
8771 8774 VN_HOLD(vp);
8772 8775
8773 8776 /*
8774 8777 * We need to unlock sp before we call the underlying filesystem to
8775 8778 * acquire the file lock.
8776 8779 */
8777 8780 rfs4_dbe_unlock(sp->rs_dbe);
8778 8781
8779 8782 error = setlock(vp, &flock, flag, cred);
8780 8783
8781 8784 /*
8782 8785 * Make sure the file is still open. In a case the file was closed in
8783 8786 * the meantime, clean the lock we acquired using the setlock() call
8784 8787 * above, and return the appropriate error.
8785 8788 */
8786 8789 rfs4_dbe_lock(sp->rs_dbe);
8787 8790 if (sp->rs_closed == TRUE) {
8788 8791 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8789 8792 rfs4_dbe_unlock(sp->rs_dbe);
8790 8793
8791 8794 VN_RELE(vp);
8792 8795
8793 8796 return (NFS4ERR_OLD_STATEID);
8794 8797 }
8795 8798 rfs4_dbe_unlock(sp->rs_dbe);
8796 8799
8797 8800 VN_RELE(vp);
8798 8801
8799 8802 if (error == 0) {
8800 8803 rfs4_dbe_lock(lsp->rls_dbe);
8801 8804 next_stateid(&lsp->rls_lockid);
8802 8805 rfs4_dbe_unlock(lsp->rls_dbe);
8803 8806 }
8804 8807
8805 8808 /*
8806 8809 * N.B. We map error values to nfsv4 errors. This is differrent
8807 8810 * than puterrno4 routine.
8808 8811 */
8809 8812 switch (error) {
8810 8813 case 0:
8811 8814 status = NFS4_OK;
8812 8815 break;
8813 8816 case EAGAIN:
8814 8817 case EACCES: /* Old value */
8815 8818 /* Can only get here if op is OP_LOCK */
8816 8819 ASSERT(resop->resop == OP_LOCK);
8817 8820 lres = &resop->nfs_resop4_u.oplock;
8818 8821 status = NFS4ERR_DENIED;
8819 8822 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8820 8823 == NFS4ERR_EXPIRED)
8821 8824 goto retry;
8822 8825 break;
8823 8826 case ENOLCK:
8824 8827 status = NFS4ERR_DELAY;
8825 8828 break;
8826 8829 case EOVERFLOW:
8827 8830 status = NFS4ERR_INVAL;
8828 8831 break;
8829 8832 case EINVAL:
8830 8833 status = NFS4ERR_NOTSUPP;
8831 8834 break;
8832 8835 default:
8833 8836 status = NFS4ERR_SERVERFAULT;
8834 8837 break;
8835 8838 }
8836 8839
8837 8840 return (status);
8838 8841 }
8839 8842
8840 8843 /*ARGSUSED*/
8841 8844 void
8842 8845 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8843 8846 struct svc_req *req, struct compound_state *cs)
8844 8847 {
8845 8848 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8846 8849 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8847 8850 nfsstat4 status;
8848 8851 stateid4 *stateid;
8849 8852 rfs4_lockowner_t *lo;
8850 8853 rfs4_client_t *cp;
8851 8854 rfs4_state_t *sp = NULL;
8852 8855 rfs4_lo_state_t *lsp = NULL;
8853 8856 bool_t ls_sw_held = FALSE;
8854 8857 bool_t create = TRUE;
8855 8858 bool_t lcreate = TRUE;
8856 8859 bool_t dup_lock = FALSE;
8857 8860 int rc;
8858 8861
8859 8862 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8860 8863 LOCK4args *, args);
8861 8864
8862 8865 if (cs->vp == NULL) {
8863 8866 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8864 8867 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8865 8868 cs, LOCK4res *, resp);
8866 8869 return;
8867 8870 }
8868 8871
8869 8872 if (args->locker.new_lock_owner) {
8870 8873 /* Create a new lockowner for this instance */
8871 8874 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8872 8875
8873 8876 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8874 8877
8875 8878 stateid = &olo->open_stateid;
8876 8879 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8877 8880 if (status != NFS4_OK) {
8878 8881 NFS4_DEBUG(rfs4_debug,
8879 8882 (CE_NOTE, "Get state failed in lock %d", status));
8880 8883 *cs->statusp = resp->status = status;
8881 8884 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8882 8885 cs, LOCK4res *, resp);
8883 8886 return;
8884 8887 }
8885 8888
8886 8889 /* Ensure specified filehandle matches */
8887 8890 if (cs->vp != sp->rs_finfo->rf_vp) {
8888 8891 rfs4_state_rele(sp);
8889 8892 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8890 8893 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8891 8894 cs, LOCK4res *, resp);
8892 8895 return;
8893 8896 }
8894 8897
8895 8898 /* hold off other access to open_owner while we tinker */
8896 8899 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8897 8900
8898 8901 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8899 8902 case NFS4_CHECK_STATEID_OLD:
8900 8903 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8901 8904 goto end;
8902 8905 case NFS4_CHECK_STATEID_BAD:
8903 8906 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8904 8907 goto end;
8905 8908 case NFS4_CHECK_STATEID_EXPIRED:
8906 8909 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8907 8910 goto end;
8908 8911 case NFS4_CHECK_STATEID_UNCONFIRMED:
8909 8912 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8910 8913 goto end;
8911 8914 case NFS4_CHECK_STATEID_CLOSED:
8912 8915 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8913 8916 goto end;
8914 8917 case NFS4_CHECK_STATEID_OKAY:
8915 8918 case NFS4_CHECK_STATEID_REPLAY:
8916 8919 switch (rfs4_check_olo_seqid(olo->open_seqid,
8917 8920 sp->rs_owner, resop)) {
8918 8921 case NFS4_CHKSEQ_OKAY:
8919 8922 if (rc == NFS4_CHECK_STATEID_OKAY)
8920 8923 break;
8921 8924 /*
8922 8925 * This is replayed stateid; if seqid
8923 8926 * matches next expected, then client
8924 8927 * is using wrong seqid.
8925 8928 */
8926 8929 /* FALLTHROUGH */
8927 8930 case NFS4_CHKSEQ_BAD:
8928 8931 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8929 8932 goto end;
8930 8933 case NFS4_CHKSEQ_REPLAY:
8931 8934 /* This is a duplicate LOCK request */
8932 8935 dup_lock = TRUE;
8933 8936
8934 8937 /*
8935 8938 * For a duplicate we do not want to
8936 8939 * create a new lockowner as it should
8937 8940 * already exist.
8938 8941 * Turn off the lockowner create flag.
8939 8942 */
8940 8943 lcreate = FALSE;
8941 8944 }
8942 8945 break;
8943 8946 }
8944 8947
8945 8948 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8946 8949 if (lo == NULL) {
8947 8950 NFS4_DEBUG(rfs4_debug,
8948 8951 (CE_NOTE, "rfs4_op_lock: no lock owner"));
8949 8952 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8950 8953 goto end;
8951 8954 }
8952 8955
8953 8956 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8954 8957 if (lsp == NULL) {
8955 8958 rfs4_update_lease(sp->rs_owner->ro_client);
8956 8959 /*
8957 8960 * Only update theh open_seqid if this is not
8958 8961 * a duplicate request
8959 8962 */
8960 8963 if (dup_lock == FALSE) {
8961 8964 rfs4_update_open_sequence(sp->rs_owner);
8962 8965 }
8963 8966
8964 8967 NFS4_DEBUG(rfs4_debug,
8965 8968 (CE_NOTE, "rfs4_op_lock: no state"));
8966 8969 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8967 8970 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8968 8971 rfs4_lockowner_rele(lo);
8969 8972 goto end;
8970 8973 }
8971 8974
8972 8975 /*
8973 8976 * This is the new_lock_owner branch and the client is
8974 8977 * supposed to be associating a new lock_owner with
8975 8978 * the open file at this point. If we find that a
8976 8979 * lock_owner/state association already exists and a
8977 8980 * successful LOCK request was returned to the client,
8978 8981 * an error is returned to the client since this is
8979 8982 * not appropriate. The client should be using the
8980 8983 * existing lock_owner branch.
8981 8984 */
8982 8985 if (dup_lock == FALSE && create == FALSE) {
8983 8986 if (lsp->rls_lock_completed == TRUE) {
8984 8987 *cs->statusp =
8985 8988 resp->status = NFS4ERR_BAD_SEQID;
8986 8989 rfs4_lockowner_rele(lo);
8987 8990 goto end;
8988 8991 }
8989 8992 }
8990 8993
8991 8994 rfs4_update_lease(sp->rs_owner->ro_client);
8992 8995
8993 8996 /*
8994 8997 * Only update theh open_seqid if this is not
8995 8998 * a duplicate request
8996 8999 */
8997 9000 if (dup_lock == FALSE) {
8998 9001 rfs4_update_open_sequence(sp->rs_owner);
8999 9002 }
9000 9003
9001 9004 /*
9002 9005 * If this is a duplicate lock request, just copy the
9003 9006 * previously saved reply and return.
9004 9007 */
9005 9008 if (dup_lock == TRUE) {
9006 9009 /* verify that lock_seqid's match */
9007 9010 if (lsp->rls_seqid != olo->lock_seqid) {
9008 9011 NFS4_DEBUG(rfs4_debug,
9009 9012 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9010 9013 "lsp->seqid=%d old->seqid=%d",
9011 9014 lsp->rls_seqid, olo->lock_seqid));
9012 9015 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9013 9016 } else {
9014 9017 rfs4_copy_reply(resop, &lsp->rls_reply);
9015 9018 /*
9016 9019 * Make sure to copy the just
9017 9020 * retrieved reply status into the
9018 9021 * overall compound status
9019 9022 */
9020 9023 *cs->statusp = resp->status;
9021 9024 }
9022 9025 rfs4_lockowner_rele(lo);
9023 9026 goto end;
9024 9027 }
9025 9028
9026 9029 rfs4_dbe_lock(lsp->rls_dbe);
9027 9030
9028 9031 /* Make sure to update the lock sequence id */
9029 9032 lsp->rls_seqid = olo->lock_seqid;
9030 9033
9031 9034 NFS4_DEBUG(rfs4_debug,
9032 9035 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9033 9036
9034 9037 /*
9035 9038 * This is used to signify the newly created lockowner
9036 9039 * stateid and its sequence number. The checks for
9037 9040 * sequence number and increment don't occur on the
9038 9041 * very first lock request for a lockowner.
9039 9042 */
9040 9043 lsp->rls_skip_seqid_check = TRUE;
9041 9044
9042 9045 /* hold off other access to lsp while we tinker */
9043 9046 rfs4_sw_enter(&lsp->rls_sw);
9044 9047 ls_sw_held = TRUE;
9045 9048
9046 9049 rfs4_dbe_unlock(lsp->rls_dbe);
9047 9050
9048 9051 rfs4_lockowner_rele(lo);
9049 9052 } else {
9050 9053 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9051 9054 /* get lsp and hold the lock on the underlying file struct */
9052 9055 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9053 9056 != NFS4_OK) {
9054 9057 *cs->statusp = resp->status = status;
9055 9058 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9056 9059 cs, LOCK4res *, resp);
9057 9060 return;
9058 9061 }
9059 9062 create = FALSE; /* We didn't create lsp */
9060 9063
9061 9064 /* Ensure specified filehandle matches */
9062 9065 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9063 9066 rfs4_lo_state_rele(lsp, TRUE);
9064 9067 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9065 9068 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9066 9069 cs, LOCK4res *, resp);
9067 9070 return;
9068 9071 }
9069 9072
9070 9073 /* hold off other access to lsp while we tinker */
9071 9074 rfs4_sw_enter(&lsp->rls_sw);
9072 9075 ls_sw_held = TRUE;
9073 9076
9074 9077 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9075 9078 /*
9076 9079 * The stateid looks like it was okay (expected to be
9077 9080 * the next one)
9078 9081 */
9079 9082 case NFS4_CHECK_STATEID_OKAY:
9080 9083 /*
9081 9084 * The sequence id is now checked. Determine
9082 9085 * if this is a replay or if it is in the
9083 9086 * expected (next) sequence. In the case of a
9084 9087 * replay, there are two replay conditions
9085 9088 * that may occur. The first is the normal
9086 9089 * condition where a LOCK is done with a
9087 9090 * NFS4_OK response and the stateid is
9088 9091 * updated. That case is handled below when
9089 9092 * the stateid is identified as a REPLAY. The
9090 9093 * second is the case where an error is
9091 9094 * returned, like NFS4ERR_DENIED, and the
9092 9095 * sequence number is updated but the stateid
9093 9096 * is not updated. This second case is dealt
9094 9097 * with here. So it may seem odd that the
9095 9098 * stateid is okay but the sequence id is a
9096 9099 * replay but it is okay.
9097 9100 */
9098 9101 switch (rfs4_check_lock_seqid(
9099 9102 args->locker.locker4_u.lock_owner.lock_seqid,
9100 9103 lsp, resop)) {
9101 9104 case NFS4_CHKSEQ_REPLAY:
9102 9105 if (resp->status != NFS4_OK) {
9103 9106 /*
9104 9107 * Here is our replay and need
9105 9108 * to verify that the last
9106 9109 * response was an error.
9107 9110 */
9108 9111 *cs->statusp = resp->status;
9109 9112 goto end;
9110 9113 }
9111 9114 /*
9112 9115 * This is done since the sequence id
9113 9116 * looked like a replay but it didn't
9114 9117 * pass our check so a BAD_SEQID is
9115 9118 * returned as a result.
9116 9119 */
9117 9120 /*FALLTHROUGH*/
9118 9121 case NFS4_CHKSEQ_BAD:
9119 9122 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9120 9123 goto end;
9121 9124 case NFS4_CHKSEQ_OKAY:
9122 9125 /* Everything looks okay move ahead */
9123 9126 break;
9124 9127 }
9125 9128 break;
9126 9129 case NFS4_CHECK_STATEID_OLD:
9127 9130 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9128 9131 goto end;
9129 9132 case NFS4_CHECK_STATEID_BAD:
9130 9133 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9131 9134 goto end;
9132 9135 case NFS4_CHECK_STATEID_EXPIRED:
9133 9136 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9134 9137 goto end;
9135 9138 case NFS4_CHECK_STATEID_CLOSED:
9136 9139 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9137 9140 goto end;
9138 9141 case NFS4_CHECK_STATEID_REPLAY:
9139 9142 switch (rfs4_check_lock_seqid(
9140 9143 args->locker.locker4_u.lock_owner.lock_seqid,
9141 9144 lsp, resop)) {
9142 9145 case NFS4_CHKSEQ_OKAY:
9143 9146 /*
9144 9147 * This is a replayed stateid; if
9145 9148 * seqid matches the next expected,
9146 9149 * then client is using wrong seqid.
9147 9150 */
9148 9151 case NFS4_CHKSEQ_BAD:
9149 9152 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9150 9153 goto end;
9151 9154 case NFS4_CHKSEQ_REPLAY:
9152 9155 rfs4_update_lease(lsp->rls_locker->rl_client);
9153 9156 *cs->statusp = status = resp->status;
9154 9157 goto end;
9155 9158 }
9156 9159 break;
9157 9160 default:
9158 9161 ASSERT(FALSE);
9159 9162 break;
9160 9163 }
9161 9164
9162 9165 rfs4_update_lock_sequence(lsp);
9163 9166 rfs4_update_lease(lsp->rls_locker->rl_client);
9164 9167 }
9165 9168
9166 9169 /*
9167 9170 * NFS4 only allows locking on regular files, so
9168 9171 * verify type of object.
9169 9172 */
9170 9173 if (cs->vp->v_type != VREG) {
9171 9174 if (cs->vp->v_type == VDIR)
9172 9175 status = NFS4ERR_ISDIR;
9173 9176 else
9174 9177 status = NFS4ERR_INVAL;
9175 9178 goto out;
9176 9179 }
9177 9180
9178 9181 cp = lsp->rls_state->rs_owner->ro_client;
9179 9182
9180 9183 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9181 9184 status = NFS4ERR_GRACE;
9182 9185 goto out;
9183 9186 }
9184 9187
9185 9188 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9186 9189 status = NFS4ERR_NO_GRACE;
9187 9190 goto out;
9188 9191 }
9189 9192
9190 9193 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9191 9194 status = NFS4ERR_NO_GRACE;
9192 9195 goto out;
9193 9196 }
9194 9197
9195 9198 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9196 9199 cs->deleg = TRUE;
9197 9200
9198 9201 status = rfs4_do_lock(lsp, args->locktype,
9199 9202 args->offset, args->length, cs->cr, resop);
9200 9203
9201 9204 out:
9202 9205 lsp->rls_skip_seqid_check = FALSE;
9203 9206
9204 9207 *cs->statusp = resp->status = status;
9205 9208
9206 9209 if (status == NFS4_OK) {
9207 9210 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9208 9211 lsp->rls_lock_completed = TRUE;
9209 9212 }
9210 9213 /*
9211 9214 * Only update the "OPEN" response here if this was a new
9212 9215 * lock_owner
9213 9216 */
9214 9217 if (sp)
9215 9218 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9216 9219
9217 9220 rfs4_update_lock_resp(lsp, resop);
9218 9221
9219 9222 end:
9220 9223 if (lsp) {
9221 9224 if (ls_sw_held)
9222 9225 rfs4_sw_exit(&lsp->rls_sw);
9223 9226 /*
9224 9227 * If an sp obtained, then the lsp does not represent
9225 9228 * a lock on the file struct.
9226 9229 */
9227 9230 if (sp != NULL)
9228 9231 rfs4_lo_state_rele(lsp, FALSE);
9229 9232 else
9230 9233 rfs4_lo_state_rele(lsp, TRUE);
9231 9234 }
9232 9235 if (sp) {
9233 9236 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9234 9237 rfs4_state_rele(sp);
9235 9238 }
9236 9239
9237 9240 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9238 9241 LOCK4res *, resp);
9239 9242 }
9240 9243
9241 9244 /* free function for LOCK/LOCKT */
9242 9245 static void
9243 9246 lock_denied_free(nfs_resop4 *resop)
9244 9247 {
9245 9248 LOCK4denied *dp = NULL;
9246 9249
9247 9250 switch (resop->resop) {
9248 9251 case OP_LOCK:
9249 9252 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9250 9253 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9251 9254 break;
9252 9255 case OP_LOCKT:
9253 9256 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9254 9257 dp = &resop->nfs_resop4_u.oplockt.denied;
9255 9258 break;
9256 9259 default:
9257 9260 break;
9258 9261 }
9259 9262
9260 9263 if (dp)
9261 9264 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9262 9265 }
9263 9266
9264 9267 /*ARGSUSED*/
9265 9268 void
9266 9269 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9267 9270 struct svc_req *req, struct compound_state *cs)
9268 9271 {
9269 9272 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9270 9273 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9271 9274 nfsstat4 status;
9272 9275 stateid4 *stateid = &args->lock_stateid;
9273 9276 rfs4_lo_state_t *lsp;
9274 9277
9275 9278 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9276 9279 LOCKU4args *, args);
9277 9280
9278 9281 if (cs->vp == NULL) {
9279 9282 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9280 9283 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9281 9284 LOCKU4res *, resp);
9282 9285 return;
9283 9286 }
9284 9287
9285 9288 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9286 9289 *cs->statusp = resp->status = status;
9287 9290 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9288 9291 LOCKU4res *, resp);
9289 9292 return;
9290 9293 }
9291 9294
9292 9295 /* Ensure specified filehandle matches */
9293 9296 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9294 9297 rfs4_lo_state_rele(lsp, TRUE);
9295 9298 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9296 9299 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9297 9300 LOCKU4res *, resp);
9298 9301 return;
9299 9302 }
9300 9303
9301 9304 /* hold off other access to lsp while we tinker */
9302 9305 rfs4_sw_enter(&lsp->rls_sw);
9303 9306
9304 9307 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9305 9308 case NFS4_CHECK_STATEID_OKAY:
9306 9309 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9307 9310 != NFS4_CHKSEQ_OKAY) {
9308 9311 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9309 9312 goto end;
9310 9313 }
9311 9314 break;
9312 9315 case NFS4_CHECK_STATEID_OLD:
9313 9316 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9314 9317 goto end;
9315 9318 case NFS4_CHECK_STATEID_BAD:
9316 9319 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9317 9320 goto end;
9318 9321 case NFS4_CHECK_STATEID_EXPIRED:
9319 9322 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9320 9323 goto end;
9321 9324 case NFS4_CHECK_STATEID_CLOSED:
9322 9325 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9323 9326 goto end;
9324 9327 case NFS4_CHECK_STATEID_REPLAY:
9325 9328 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9326 9329 case NFS4_CHKSEQ_OKAY:
9327 9330 /*
9328 9331 * This is a replayed stateid; if
9329 9332 * seqid matches the next expected,
9330 9333 * then client is using wrong seqid.
9331 9334 */
9332 9335 case NFS4_CHKSEQ_BAD:
9333 9336 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9334 9337 goto end;
9335 9338 case NFS4_CHKSEQ_REPLAY:
9336 9339 rfs4_update_lease(lsp->rls_locker->rl_client);
9337 9340 *cs->statusp = status = resp->status;
9338 9341 goto end;
9339 9342 }
9340 9343 break;
9341 9344 default:
9342 9345 ASSERT(FALSE);
9343 9346 break;
9344 9347 }
9345 9348
9346 9349 rfs4_update_lock_sequence(lsp);
9347 9350 rfs4_update_lease(lsp->rls_locker->rl_client);
9348 9351
9349 9352 /*
9350 9353 * NFS4 only allows locking on regular files, so
9351 9354 * verify type of object.
9352 9355 */
9353 9356 if (cs->vp->v_type != VREG) {
9354 9357 if (cs->vp->v_type == VDIR)
9355 9358 status = NFS4ERR_ISDIR;
9356 9359 else
9357 9360 status = NFS4ERR_INVAL;
9358 9361 goto out;
9359 9362 }
9360 9363
9361 9364 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9362 9365 status = NFS4ERR_GRACE;
9363 9366 goto out;
9364 9367 }
9365 9368
9366 9369 status = rfs4_do_lock(lsp, args->locktype,
9367 9370 args->offset, args->length, cs->cr, resop);
9368 9371
9369 9372 out:
9370 9373 *cs->statusp = resp->status = status;
9371 9374
9372 9375 if (status == NFS4_OK)
9373 9376 resp->lock_stateid = lsp->rls_lockid.stateid;
9374 9377
9375 9378 rfs4_update_lock_resp(lsp, resop);
9376 9379
9377 9380 end:
9378 9381 rfs4_sw_exit(&lsp->rls_sw);
9379 9382 rfs4_lo_state_rele(lsp, TRUE);
9380 9383
9381 9384 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9382 9385 LOCKU4res *, resp);
9383 9386 }
9384 9387
9385 9388 /*
9386 9389 * LOCKT is a best effort routine, the client can not be guaranteed that
9387 9390 * the status return is still in effect by the time the reply is received.
9388 9391 * They are numerous race conditions in this routine, but we are not required
9389 9392 * and can not be accurate.
9390 9393 */
9391 9394 /*ARGSUSED*/
9392 9395 void
9393 9396 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9394 9397 struct svc_req *req, struct compound_state *cs)
9395 9398 {
9396 9399 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9397 9400 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9398 9401 rfs4_lockowner_t *lo;
9399 9402 rfs4_client_t *cp;
9400 9403 bool_t create = FALSE;
9401 9404 struct flock64 flk;
9402 9405 int error;
9403 9406 int flag = FREAD | FWRITE;
9404 9407 int ltype;
9405 9408 length4 posix_length;
9406 9409 sysid_t sysid;
9407 9410 pid_t pid;
9408 9411
9409 9412 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9410 9413 LOCKT4args *, args);
9411 9414
9412 9415 if (cs->vp == NULL) {
9413 9416 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9414 9417 goto out;
9415 9418 }
9416 9419
9417 9420 /*
9418 9421 * NFS4 only allows locking on regular files, so
9419 9422 * verify type of object.
9420 9423 */
9421 9424 if (cs->vp->v_type != VREG) {
9422 9425 if (cs->vp->v_type == VDIR)
9423 9426 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9424 9427 else
9425 9428 *cs->statusp = resp->status = NFS4ERR_INVAL;
9426 9429 goto out;
9427 9430 }
9428 9431
9429 9432 /*
9430 9433 * Check out the clientid to ensure the server knows about it
9431 9434 * so that we correctly inform the client of a server reboot.
9432 9435 */
9433 9436 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9434 9437 == NULL) {
9435 9438 *cs->statusp = resp->status =
9436 9439 rfs4_check_clientid(&args->owner.clientid, 0);
9437 9440 goto out;
9438 9441 }
9439 9442 if (rfs4_lease_expired(cp)) {
9440 9443 rfs4_client_close(cp);
9441 9444 /*
9442 9445 * Protocol doesn't allow returning NFS4ERR_STALE as
9443 9446 * other operations do on this check so STALE_CLIENTID
9444 9447 * is returned instead
9445 9448 */
9446 9449 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9447 9450 goto out;
9448 9451 }
9449 9452
9450 9453 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9451 9454 *cs->statusp = resp->status = NFS4ERR_GRACE;
9452 9455 rfs4_client_rele(cp);
9453 9456 goto out;
9454 9457 }
9455 9458 rfs4_client_rele(cp);
9456 9459
9457 9460 resp->status = NFS4_OK;
9458 9461
9459 9462 switch (args->locktype) {
9460 9463 case READ_LT:
9461 9464 case READW_LT:
9462 9465 ltype = F_RDLCK;
9463 9466 break;
9464 9467 case WRITE_LT:
9465 9468 case WRITEW_LT:
9466 9469 ltype = F_WRLCK;
9467 9470 break;
9468 9471 }
9469 9472
9470 9473 posix_length = args->length;
9471 9474 /* Check for zero length. To lock to end of file use all ones for V4 */
9472 9475 if (posix_length == 0) {
9473 9476 *cs->statusp = resp->status = NFS4ERR_INVAL;
9474 9477 goto out;
9475 9478 } else if (posix_length == (length4)(~0)) {
9476 9479 posix_length = 0; /* Posix to end of file */
9477 9480 }
9478 9481
9479 9482 /* Find or create a lockowner */
9480 9483 lo = rfs4_findlockowner(&args->owner, &create);
9481 9484
9482 9485 if (lo) {
9483 9486 pid = lo->rl_pid;
9484 9487 if ((resp->status =
9485 9488 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9486 9489 goto err;
9487 9490 } else {
9488 9491 pid = 0;
9489 9492 sysid = lockt_sysid;
9490 9493 }
9491 9494 retry:
9492 9495 flk.l_type = ltype;
9493 9496 flk.l_whence = 0; /* SEEK_SET */
9494 9497 flk.l_start = args->offset;
9495 9498 flk.l_len = posix_length;
9496 9499 flk.l_sysid = sysid;
9497 9500 flk.l_pid = pid;
9498 9501 flag |= F_REMOTELOCK;
9499 9502
9500 9503 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9501 9504
9502 9505 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9503 9506 if (flk.l_len < 0 || flk.l_start < 0) {
9504 9507 resp->status = NFS4ERR_INVAL;
9505 9508 goto err;
9506 9509 }
9507 9510 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9508 9511 NULL, cs->cr, NULL);
9509 9512
9510 9513 /*
9511 9514 * N.B. We map error values to nfsv4 errors. This is differrent
9512 9515 * than puterrno4 routine.
9513 9516 */
9514 9517 switch (error) {
9515 9518 case 0:
9516 9519 if (flk.l_type == F_UNLCK)
9517 9520 resp->status = NFS4_OK;
9518 9521 else {
9519 9522 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9520 9523 goto retry;
9521 9524 resp->status = NFS4ERR_DENIED;
9522 9525 }
9523 9526 break;
9524 9527 case EOVERFLOW:
9525 9528 resp->status = NFS4ERR_INVAL;
9526 9529 break;
9527 9530 case EINVAL:
9528 9531 resp->status = NFS4ERR_NOTSUPP;
9529 9532 break;
9530 9533 default:
9531 9534 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9532 9535 error);
9533 9536 resp->status = NFS4ERR_SERVERFAULT;
9534 9537 break;
9535 9538 }
9536 9539
9537 9540 err:
9538 9541 if (lo)
9539 9542 rfs4_lockowner_rele(lo);
9540 9543 *cs->statusp = resp->status;
9541 9544 out:
9542 9545 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9543 9546 LOCKT4res *, resp);
9544 9547 }
9545 9548
9546 9549 int
9547 9550 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9548 9551 {
9549 9552 int err;
9550 9553 int cmd;
9551 9554 vnode_t *vp;
9552 9555 struct shrlock shr;
9553 9556 struct shr_locowner shr_loco;
9554 9557 int fflags = 0;
9555 9558
9556 9559 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9557 9560 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9558 9561
9559 9562 if (sp->rs_closed)
9560 9563 return (NFS4ERR_OLD_STATEID);
9561 9564
9562 9565 vp = sp->rs_finfo->rf_vp;
9563 9566 ASSERT(vp);
9564 9567
9565 9568 shr.s_access = shr.s_deny = 0;
9566 9569
9567 9570 if (access & OPEN4_SHARE_ACCESS_READ) {
9568 9571 fflags |= FREAD;
9569 9572 shr.s_access |= F_RDACC;
9570 9573 }
9571 9574 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9572 9575 fflags |= FWRITE;
9573 9576 shr.s_access |= F_WRACC;
9574 9577 }
9575 9578 ASSERT(shr.s_access);
9576 9579
9577 9580 if (deny & OPEN4_SHARE_DENY_READ)
9578 9581 shr.s_deny |= F_RDDNY;
9579 9582 if (deny & OPEN4_SHARE_DENY_WRITE)
9580 9583 shr.s_deny |= F_WRDNY;
9581 9584
9582 9585 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9583 9586 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9584 9587 shr_loco.sl_pid = shr.s_pid;
9585 9588 shr_loco.sl_id = shr.s_sysid;
9586 9589 shr.s_owner = (caddr_t)&shr_loco;
9587 9590 shr.s_own_len = sizeof (shr_loco);
9588 9591
9589 9592 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9590 9593
9591 9594 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9592 9595 if (err != 0) {
9593 9596 if (err == EAGAIN)
9594 9597 err = NFS4ERR_SHARE_DENIED;
9595 9598 else
9596 9599 err = puterrno4(err);
9597 9600 return (err);
9598 9601 }
9599 9602
9600 9603 sp->rs_share_access |= access;
9601 9604 sp->rs_share_deny |= deny;
9602 9605
9603 9606 return (0);
9604 9607 }
9605 9608
9606 9609 int
9607 9610 rfs4_unshare(rfs4_state_t *sp)
9608 9611 {
9609 9612 int err;
9610 9613 struct shrlock shr;
9611 9614 struct shr_locowner shr_loco;
9612 9615
9613 9616 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9614 9617
9615 9618 if (sp->rs_closed || sp->rs_share_access == 0)
9616 9619 return (0);
9617 9620
9618 9621 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9619 9622 ASSERT(sp->rs_finfo->rf_vp);
9620 9623
9621 9624 shr.s_access = shr.s_deny = 0;
9622 9625 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9623 9626 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9624 9627 shr_loco.sl_pid = shr.s_pid;
9625 9628 shr_loco.sl_id = shr.s_sysid;
9626 9629 shr.s_owner = (caddr_t)&shr_loco;
9627 9630 shr.s_own_len = sizeof (shr_loco);
9628 9631
9629 9632 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9630 9633 NULL);
9631 9634 if (err != 0) {
9632 9635 err = puterrno4(err);
9633 9636 return (err);
9634 9637 }
9635 9638
9636 9639 sp->rs_share_access = 0;
9637 9640 sp->rs_share_deny = 0;
9638 9641
9639 9642 return (0);
9640 9643
9641 9644 }
9642 9645
9643 9646 static int
9644 9647 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9645 9648 {
9646 9649 struct clist *wcl;
9647 9650 count4 count = rok->data_len;
9648 9651 int wlist_len;
9649 9652
9650 9653 wcl = args->wlist;
9651 9654 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9652 9655 return (FALSE);
9653 9656 }
9654 9657 wcl = args->wlist;
9655 9658 rok->wlist_len = wlist_len;
9656 9659 rok->wlist = wcl;
9657 9660 return (TRUE);
9658 9661 }
9659 9662
9660 9663 /* tunable to disable server referrals */
9661 9664 int rfs4_no_referrals = 0;
9662 9665
9663 9666 /*
9664 9667 * Find an NFS record in reparse point data.
9665 9668 * Returns 0 for success and <0 or an errno value on failure.
9666 9669 */
9667 9670 int
9668 9671 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9669 9672 {
9670 9673 int err;
9671 9674 char *stype, *val;
9672 9675 nvlist_t *nvl;
9673 9676 nvpair_t *curr;
9674 9677
9675 9678 if ((nvl = reparse_init()) == NULL)
9676 9679 return (-1);
9677 9680
9678 9681 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9679 9682 reparse_free(nvl);
9680 9683 return (err);
9681 9684 }
9682 9685
9683 9686 curr = NULL;
9684 9687 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9685 9688 if ((stype = nvpair_name(curr)) == NULL) {
9686 9689 reparse_free(nvl);
9687 9690 return (-2);
9688 9691 }
9689 9692 if (strncasecmp(stype, "NFS", 3) == 0)
9690 9693 break;
9691 9694 }
9692 9695
9693 9696 if ((curr == NULL) ||
9694 9697 (nvpair_value_string(curr, &val))) {
9695 9698 reparse_free(nvl);
9696 9699 return (-3);
9697 9700 }
9698 9701 *nvlp = nvl;
9699 9702 *svcp = stype;
9700 9703 *datap = val;
9701 9704 return (0);
9702 9705 }
9703 9706
9704 9707 int
9705 9708 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9706 9709 {
9707 9710 nvlist_t *nvl;
9708 9711 char *s, *d;
9709 9712
9710 9713 if (rfs4_no_referrals != 0)
9711 9714 return (B_FALSE);
9712 9715
9713 9716 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9714 9717 return (B_FALSE);
9715 9718
9716 9719 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9717 9720 return (B_FALSE);
9718 9721
9719 9722 reparse_free(nvl);
9720 9723
9721 9724 return (B_TRUE);
9722 9725 }
9723 9726
9724 9727 /*
9725 9728 * There is a user-level copy of this routine in ref_subr.c.
9726 9729 * Changes should be kept in sync.
9727 9730 */
9728 9731 static int
9729 9732 nfs4_create_components(char *path, component4 *comp4)
9730 9733 {
9731 9734 int slen, plen, ncomp;
9732 9735 char *ori_path, *nxtc, buf[MAXNAMELEN];
9733 9736
9734 9737 if (path == NULL)
9735 9738 return (0);
9736 9739
9737 9740 plen = strlen(path) + 1; /* include the terminator */
9738 9741 ori_path = path;
9739 9742 ncomp = 0;
9740 9743
9741 9744 /* count number of components in the path */
9742 9745 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9743 9746 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9744 9747 if ((slen = nxtc - path) == 0) {
9745 9748 path = nxtc + 1;
9746 9749 continue;
9747 9750 }
9748 9751
9749 9752 if (comp4 != NULL) {
9750 9753 bcopy(path, buf, slen);
9751 9754 buf[slen] = '\0';
9752 9755 (void) str_to_utf8(buf, &comp4[ncomp]);
9753 9756 }
9754 9757
9755 9758 ncomp++; /* 1 valid component */
9756 9759 path = nxtc + 1;
9757 9760 }
9758 9761 if (*nxtc == '\0' || *nxtc == '\n')
9759 9762 break;
9760 9763 }
9761 9764
9762 9765 return (ncomp);
9763 9766 }
9764 9767
9765 9768 /*
9766 9769 * There is a user-level copy of this routine in ref_subr.c.
9767 9770 * Changes should be kept in sync.
9768 9771 */
9769 9772 static int
9770 9773 make_pathname4(char *path, pathname4 *pathname)
9771 9774 {
9772 9775 int ncomp;
9773 9776 component4 *comp4;
9774 9777
9775 9778 if (pathname == NULL)
9776 9779 return (0);
9777 9780
9778 9781 if (path == NULL) {
9779 9782 pathname->pathname4_val = NULL;
9780 9783 pathname->pathname4_len = 0;
9781 9784 return (0);
9782 9785 }
9783 9786
9784 9787 /* count number of components to alloc buffer */
9785 9788 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9786 9789 pathname->pathname4_val = NULL;
9787 9790 pathname->pathname4_len = 0;
9788 9791 return (0);
9789 9792 }
9790 9793 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9791 9794
9792 9795 /* copy components into allocated buffer */
9793 9796 ncomp = nfs4_create_components(path, comp4);
9794 9797
9795 9798 pathname->pathname4_val = comp4;
9796 9799 pathname->pathname4_len = ncomp;
9797 9800
9798 9801 return (ncomp);
9799 9802 }
9800 9803
9801 9804 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9802 9805
9803 9806 fs_locations4 *
9804 9807 fetch_referral(vnode_t *vp, cred_t *cr)
9805 9808 {
9806 9809 nvlist_t *nvl;
9807 9810 char *stype, *sdata;
9808 9811 fs_locations4 *result;
9809 9812 char buf[1024];
9810 9813 size_t bufsize;
9811 9814 XDR xdr;
9812 9815 int err;
9813 9816
9814 9817 /*
9815 9818 * Check attrs to ensure it's a reparse point
9816 9819 */
9817 9820 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9818 9821 return (NULL);
9819 9822
9820 9823 /*
9821 9824 * Look for an NFS record and get the type and data
9822 9825 */
9823 9826 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9824 9827 return (NULL);
9825 9828
9826 9829 /*
9827 9830 * With the type and data, upcall to get the referral
9828 9831 */
9829 9832 bufsize = sizeof (buf);
9830 9833 bzero(buf, sizeof (buf));
9831 9834 err = reparse_kderef((const char *)stype, (const char *)sdata,
9832 9835 buf, &bufsize);
9833 9836 reparse_free(nvl);
9834 9837
9835 9838 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9836 9839 char *, stype, char *, sdata, char *, buf, int, err);
9837 9840 if (err) {
9838 9841 cmn_err(CE_NOTE,
9839 9842 "reparsed daemon not running: unable to get referral (%d)",
9840 9843 err);
9841 9844 return (NULL);
9842 9845 }
9843 9846
9844 9847 /*
9845 9848 * We get an XDR'ed record back from the kderef call
9846 9849 */
9847 9850 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9848 9851 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9849 9852 err = xdr_fs_locations4(&xdr, result);
9850 9853 XDR_DESTROY(&xdr);
9851 9854 if (err != TRUE) {
9852 9855 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9853 9856 int, err);
9854 9857 return (NULL);
9855 9858 }
9856 9859
9857 9860 /*
9858 9861 * Look at path to recover fs_root, ignoring the leading '/'
9859 9862 */
9860 9863 (void) make_pathname4(vp->v_path, &result->fs_root);
9861 9864
9862 9865 return (result);
9863 9866 }
9864 9867
9865 9868 char *
9866 9869 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9867 9870 {
9868 9871 fs_locations4 *fsl;
9869 9872 fs_location4 *fs;
9870 9873 char *server, *path, *symbuf;
9871 9874 static char *prefix = "/net/";
9872 9875 int i, size, npaths;
9873 9876 uint_t len;
9874 9877
9875 9878 /* Get the referral */
9876 9879 if ((fsl = fetch_referral(vp, cr)) == NULL)
9877 9880 return (NULL);
9878 9881
9879 9882 /* Deal with only the first location and first server */
9880 9883 fs = &fsl->locations_val[0];
9881 9884 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9882 9885 if (server == NULL) {
9883 9886 rfs4_free_fs_locations4(fsl);
9884 9887 kmem_free(fsl, sizeof (fs_locations4));
9885 9888 return (NULL);
9886 9889 }
9887 9890
9888 9891 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9889 9892 size = strlen(prefix) + len;
9890 9893 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9891 9894 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9892 9895
9893 9896 /* Allocate the symlink buffer and fill it */
9894 9897 symbuf = kmem_zalloc(size, KM_SLEEP);
9895 9898 (void) strcat(symbuf, prefix);
9896 9899 (void) strcat(symbuf, server);
9897 9900 kmem_free(server, len);
9898 9901
9899 9902 npaths = 0;
9900 9903 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9901 9904 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9902 9905 if (path == NULL)
9903 9906 continue;
9904 9907 (void) strcat(symbuf, "/");
9905 9908 (void) strcat(symbuf, path);
9906 9909 npaths++;
9907 9910 kmem_free(path, len);
9908 9911 }
9909 9912
9910 9913 rfs4_free_fs_locations4(fsl);
9911 9914 kmem_free(fsl, sizeof (fs_locations4));
9912 9915
9913 9916 if (strsz != NULL)
9914 9917 *strsz = size;
9915 9918 return (symbuf);
9916 9919 }
9917 9920
9918 9921 /*
9919 9922 * Check to see if we have a downrev Solaris client, so that we
9920 9923 * can send it a symlink instead of a referral.
9921 9924 */
9922 9925 int
9923 9926 client_is_downrev(struct svc_req *req)
9924 9927 {
9925 9928 struct sockaddr *ca;
9926 9929 rfs4_clntip_t *ci;
9927 9930 bool_t create = FALSE;
9928 9931 int is_downrev;
9929 9932
9930 9933 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9931 9934 ASSERT(ca);
9932 9935 ci = rfs4_find_clntip(ca, &create);
9933 9936 if (ci == NULL)
9934 9937 return (0);
9935 9938 is_downrev = ci->ri_no_referrals;
9936 9939 rfs4_dbe_rele(ci->ri_dbe);
9937 9940 return (is_downrev);
9938 9941 }
9939 9942
9940 9943 /*
9941 9944 * Do the main work of handling HA-NFSv4 Resource Group failover on
9942 9945 * Sun Cluster.
9943 9946 * We need to detect whether any RG admin paths have been added or removed,
9944 9947 * and adjust resources accordingly.
9945 9948 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9946 9949 * order to scale, the list and array of paths need to be held in more
9947 9950 * suitable data structures.
9948 9951 */
9949 9952 static void
9950 9953 hanfsv4_failover(nfs4_srv_t *nsrv4)
9951 9954 {
9952 9955 int i, start_grace, numadded_paths = 0;
9953 9956 char **added_paths = NULL;
9954 9957 rfs4_dss_path_t *dss_path;
9955 9958
9956 9959 /*
9957 9960 * Note: currently, dss_pathlist cannot be NULL, since
9958 9961 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9959 9962 * make the latter dynamically specified too, the following will
9960 9963 * need to be adjusted.
9961 9964 */
9962 9965
9963 9966 /*
9964 9967 * First, look for removed paths: RGs that have been failed-over
9965 9968 * away from this node.
9966 9969 * Walk the "currently-serving" dss_pathlist and, for each
9967 9970 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9968 9971 * from nfsd. If not, that RG path has been removed.
9969 9972 *
9970 9973 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9971 9974 * any duplicates.
9972 9975 */
9973 9976 dss_path = nsrv4->dss_pathlist;
9974 9977 do {
9975 9978 int found = 0;
9976 9979 char *path = dss_path->path;
9977 9980
9978 9981 /* used only for non-HA so may not be removed */
9979 9982 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9980 9983 dss_path = dss_path->next;
9981 9984 continue;
9982 9985 }
9983 9986
9984 9987 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9985 9988 int cmpret;
9986 9989 char *newpath = rfs4_dss_newpaths[i];
9987 9990
9988 9991 /*
9989 9992 * Since nfsd has sorted rfs4_dss_newpaths for us,
9990 9993 * once the return from strcmp is negative we know
9991 9994 * we've passed the point where "path" should be,
9992 9995 * and can stop searching: "path" has been removed.
9993 9996 */
9994 9997 cmpret = strcmp(path, newpath);
9995 9998 if (cmpret < 0)
9996 9999 break;
9997 10000 if (cmpret == 0) {
9998 10001 found = 1;
9999 10002 break;
10000 10003 }
10001 10004 }
10002 10005
10003 10006 if (found == 0) {
10004 10007 unsigned index = dss_path->index;
10005 10008 rfs4_servinst_t *sip = dss_path->sip;
10006 10009 rfs4_dss_path_t *path_next = dss_path->next;
10007 10010
10008 10011 /*
10009 10012 * This path has been removed.
10010 10013 * We must clear out the servinst reference to
10011 10014 * it, since it's now owned by another
10012 10015 * node: we should not attempt to touch it.
10013 10016 */
10014 10017 ASSERT(dss_path == sip->dss_paths[index]);
10015 10018 sip->dss_paths[index] = NULL;
10016 10019
10017 10020 /* remove from "currently-serving" list, and destroy */
10018 10021 remque(dss_path);
10019 10022 /* allow for NUL */
10020 10023 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10021 10024 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10022 10025
10023 10026 dss_path = path_next;
10024 10027 } else {
10025 10028 /* path was found; not removed */
10026 10029 dss_path = dss_path->next;
10027 10030 }
10028 10031 } while (dss_path != nsrv4->dss_pathlist);
10029 10032
10030 10033 /*
10031 10034 * Now, look for added paths: RGs that have been failed-over
10032 10035 * to this node.
10033 10036 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10034 10037 * for each path, check if it is on the "currently-serving"
10035 10038 * dss_pathlist. If not, that RG path has been added.
10036 10039 *
10037 10040 * Note: we don't do duplicate detection here; nfsd does that for us.
10038 10041 *
10039 10042 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10040 10043 * an upper bound for the size needed for added_paths[numadded_paths].
10041 10044 */
10042 10045
10043 10046 /* probably more space than we need, but guaranteed to be enough */
10044 10047 if (rfs4_dss_numnewpaths > 0) {
10045 10048 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10046 10049 added_paths = kmem_zalloc(sz, KM_SLEEP);
10047 10050 }
10048 10051
10049 10052 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10050 10053 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10051 10054 int found = 0;
10052 10055 char *newpath = rfs4_dss_newpaths[i];
10053 10056
10054 10057 dss_path = nsrv4->dss_pathlist;
10055 10058 do {
10056 10059 char *path = dss_path->path;
10057 10060
10058 10061 /* used only for non-HA */
10059 10062 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10060 10063 dss_path = dss_path->next;
10061 10064 continue;
10062 10065 }
10063 10066
10064 10067 if (strncmp(path, newpath, strlen(path)) == 0) {
10065 10068 found = 1;
10066 10069 break;
10067 10070 }
10068 10071
10069 10072 dss_path = dss_path->next;
10070 10073 } while (dss_path != nsrv4->dss_pathlist);
10071 10074
10072 10075 if (found == 0) {
10073 10076 added_paths[numadded_paths] = newpath;
10074 10077 numadded_paths++;
10075 10078 }
10076 10079 }
10077 10080
10078 10081 /* did we find any added paths? */
10079 10082 if (numadded_paths > 0) {
10080 10083
10081 10084 /* create a new server instance, and start its grace period */
10082 10085 start_grace = 1;
10083 10086 /* CSTYLED */
10084 10087 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10085 10088
10086 10089 /* read in the stable storage state from these paths */
10087 10090 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10088 10091
10089 10092 /*
10090 10093 * Multiple failovers during a grace period will cause
10091 10094 * clients of the same resource group to be partitioned
10092 10095 * into different server instances, with different
10093 10096 * grace periods. Since clients of the same resource
10094 10097 * group must be subject to the same grace period,
10095 10098 * we need to reset all currently active grace periods.
10096 10099 */
10097 10100 rfs4_grace_reset_all(nsrv4);
10098 10101 }
10099 10102
10100 10103 if (rfs4_dss_numnewpaths > 0)
10101 10104 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10102 10105 }
|
↓ open down ↓ |
1856 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX