Print this page
Try to remove assumption that zone's root vnode is marked VROOT
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 28 * All Rights Reserved
29 29 */
30 30
31 31 /*
32 32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33 33 * Copyright 2019 Nexenta Systems, Inc.
34 34 * Copyright 2019 Nexenta by DDN, Inc.
35 35 */
36 36
37 37 #include <sys/param.h>
38 38 #include <sys/types.h>
39 39 #include <sys/systm.h>
40 40 #include <sys/cred.h>
41 41 #include <sys/buf.h>
42 42 #include <sys/vfs.h>
43 43 #include <sys/vfs_opreg.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/uio.h>
46 46 #include <sys/errno.h>
47 47 #include <sys/sysmacros.h>
48 48 #include <sys/statvfs.h>
49 49 #include <sys/kmem.h>
50 50 #include <sys/dirent.h>
51 51 #include <sys/cmn_err.h>
52 52 #include <sys/debug.h>
53 53 #include <sys/systeminfo.h>
54 54 #include <sys/flock.h>
55 55 #include <sys/pathname.h>
56 56 #include <sys/nbmlock.h>
57 57 #include <sys/share.h>
58 58 #include <sys/atomic.h>
59 59 #include <sys/policy.h>
60 60 #include <sys/fem.h>
61 61 #include <sys/sdt.h>
62 62 #include <sys/ddi.h>
63 63 #include <sys/zone.h>
64 64
65 65 #include <fs/fs_reparse.h>
66 66
67 67 #include <rpc/types.h>
68 68 #include <rpc/auth.h>
69 69 #include <rpc/rpcsec_gss.h>
70 70 #include <rpc/svc.h>
71 71
72 72 #include <nfs/nfs.h>
73 73 #include <nfs/nfssys.h>
74 74 #include <nfs/export.h>
75 75 #include <nfs/nfs_cmd.h>
76 76 #include <nfs/lm.h>
77 77 #include <nfs/nfs4.h>
78 78 #include <nfs/nfs4_drc.h>
79 79
80 80 #include <sys/strsubr.h>
81 81 #include <sys/strsun.h>
82 82
83 83 #include <inet/common.h>
84 84 #include <inet/ip.h>
85 85 #include <inet/ip6.h>
86 86
87 87 #include <sys/tsol/label.h>
88 88 #include <sys/tsol/tndb.h>
89 89
90 90 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
91 91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
92 92 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
93 93 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
94 94 extern struct svc_ops rdma_svc_ops;
95 95 extern int nfs_loaned_buffers;
96 96 /* End of Tunables */
97 97
98 98 static int rdma_setup_read_data4(READ4args *, READ4res *);
99 99
100 100 /*
101 101 * Used to bump the stateid4.seqid value and show changes in the stateid
102 102 */
103 103 #define next_stateid(sp) (++(sp)->bits.chgseq)
104 104
105 105 /*
106 106 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
107 107 * This is used to return NFS4ERR_TOOSMALL when clients specify
108 108 * maxcount that isn't large enough to hold the smallest possible
109 109 * XDR encoded dirent.
110 110 *
111 111 * sizeof cookie (8 bytes) +
112 112 * sizeof name_len (4 bytes) +
113 113 * sizeof smallest (padded) name (4 bytes) +
114 114 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
115 115 * sizeof attrlist4_len (4 bytes) +
116 116 * sizeof next boolean (4 bytes)
117 117 *
118 118 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
119 119 * the smallest possible entry4 (assumes no attrs requested).
120 120 * sizeof nfsstat4 (4 bytes) +
121 121 * sizeof verifier4 (8 bytes) +
122 122 * sizeof entry4list bool (4 bytes) +
123 123 * sizeof entry4 (36 bytes) +
124 124 * sizeof eof bool (4 bytes)
125 125 *
126 126 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
127 127 * VOP_READDIR. Its value is the size of the maximum possible dirent
128 128 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
129 129 * required for a given name length. MAXNAMELEN is the maximum
130 130 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
131 131 * macros are to allow for . and .. entries -- just a minor tweak to try
132 132 * and guarantee that buffer we give to VOP_READDIR will be large enough
133 133 * to hold ., .., and the largest possible solaris dirent64.
134 134 */
135 135 #define RFS4_MINLEN_ENTRY4 36
136 136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
137 137 #define RFS4_MINLEN_RDDIR_BUF \
138 138 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
139 139
140 140 /*
141 141 * It would be better to pad to 4 bytes since that's what XDR would do,
142 142 * but the dirents UFS gives us are already padded to 8, so just take
143 143 * what we're given. Dircount is only a hint anyway. Currently the
144 144 * solaris kernel is ASCII only, so there's no point in calling the
145 145 * UTF8 functions.
146 146 *
147 147 * dirent64: named padded to provide 8 byte struct alignment
148 148 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
149 149 *
150 150 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
151 151 *
152 152 */
153 153 #define DIRENT64_TO_DIRCOUNT(dp) \
154 154 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
155 155
156 156 zone_key_t rfs4_zone_key;
157 157
158 158 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
159 159
160 160 u_longlong_t nfs4_srv_caller_id;
161 161 uint_t nfs4_srv_vkey = 0;
162 162
163 163 void rfs4_init_compound_state(struct compound_state *);
164 164
165 165 static void nullfree(caddr_t);
166 166 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 167 struct compound_state *);
168 168 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 169 struct compound_state *);
170 170 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 171 struct compound_state *);
172 172 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 173 struct compound_state *);
174 174 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
175 175 struct compound_state *);
176 176 static void rfs4_op_create_free(nfs_resop4 *resop);
177 177 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
178 178 struct svc_req *, struct compound_state *);
179 179 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
180 180 struct svc_req *, struct compound_state *);
181 181 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
182 182 struct compound_state *);
183 183 static void rfs4_op_getattr_free(nfs_resop4 *);
184 184 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
185 185 struct compound_state *);
186 186 static void rfs4_op_getfh_free(nfs_resop4 *);
187 187 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
188 188 struct compound_state *);
189 189 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
190 190 struct compound_state *);
191 191 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
192 192 struct compound_state *);
193 193 static void lock_denied_free(nfs_resop4 *);
194 194 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 195 struct compound_state *);
196 196 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
197 197 struct compound_state *);
198 198 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 199 struct compound_state *);
200 200 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
201 201 struct compound_state *);
202 202 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
203 203 struct svc_req *req, struct compound_state *cs);
204 204 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
205 205 struct compound_state *);
206 206 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
207 207 struct compound_state *);
208 208 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
209 209 struct svc_req *, struct compound_state *);
210 210 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
211 211 struct svc_req *, struct compound_state *);
212 212 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
213 213 struct compound_state *);
214 214 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
215 215 struct compound_state *);
216 216 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
217 217 struct compound_state *);
218 218 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
219 219 struct compound_state *);
220 220 static void rfs4_op_read_free(nfs_resop4 *);
221 221 static void rfs4_op_readdir_free(nfs_resop4 *resop);
222 222 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
223 223 struct compound_state *);
224 224 static void rfs4_op_readlink_free(nfs_resop4 *);
225 225 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
226 226 struct svc_req *, struct compound_state *);
227 227 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
228 228 struct compound_state *);
229 229 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 230 struct compound_state *);
231 231 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
232 232 struct compound_state *);
233 233 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
234 234 struct compound_state *);
235 235 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
236 236 struct compound_state *);
237 237 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
238 238 struct compound_state *);
239 239 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
240 240 struct compound_state *);
241 241 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
242 242 struct compound_state *);
243 243 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
244 244 struct svc_req *, struct compound_state *);
245 245 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
246 246 struct svc_req *req, struct compound_state *);
247 247 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
248 248 struct compound_state *);
249 249 static void rfs4_op_secinfo_free(nfs_resop4 *);
250 250
251 251 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
252 252 struct svc_req *);
253 253 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
254 254 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
255 255
256 256
257 257 /*
258 258 * translation table for attrs
259 259 */
260 260 struct nfs4_ntov_table {
261 261 union nfs4_attr_u *na;
262 262 uint8_t amap[NFS4_MAXNUM_ATTRS];
263 263 int attrcnt;
264 264 bool_t vfsstat;
265 265 };
266 266
267 267 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
268 268 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
269 269 struct nfs4_svgetit_arg *sargp);
270 270
271 271 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
272 272 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
273 273 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
274 274
275 275 static void hanfsv4_failover(nfs4_srv_t *);
276 276
277 277 fem_t *deleg_rdops;
278 278 fem_t *deleg_wrops;
279 279
280 280 /*
281 281 * NFS4 op dispatch table
282 282 */
283 283
284 284 struct rfsv4disp {
285 285 void (*dis_proc)(); /* proc to call */
286 286 void (*dis_resfree)(); /* frees space allocated by proc */
287 287 int dis_flags; /* RPC_IDEMPOTENT, etc... */
288 288 };
289 289
290 290 static struct rfsv4disp rfsv4disptab[] = {
291 291 /*
292 292 * NFS VERSION 4
293 293 */
294 294
295 295 /* RFS_NULL = 0 */
296 296 {rfs4_op_illegal, nullfree, 0},
297 297
298 298 /* UNUSED = 1 */
299 299 {rfs4_op_illegal, nullfree, 0},
300 300
301 301 /* UNUSED = 2 */
302 302 {rfs4_op_illegal, nullfree, 0},
303 303
304 304 /* OP_ACCESS = 3 */
305 305 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
306 306
307 307 /* OP_CLOSE = 4 */
308 308 {rfs4_op_close, nullfree, 0},
309 309
310 310 /* OP_COMMIT = 5 */
311 311 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
312 312
313 313 /* OP_CREATE = 6 */
314 314 {rfs4_op_create, nullfree, 0},
315 315
316 316 /* OP_DELEGPURGE = 7 */
317 317 {rfs4_op_delegpurge, nullfree, 0},
318 318
319 319 /* OP_DELEGRETURN = 8 */
320 320 {rfs4_op_delegreturn, nullfree, 0},
321 321
322 322 /* OP_GETATTR = 9 */
323 323 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
324 324
325 325 /* OP_GETFH = 10 */
326 326 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
327 327
328 328 /* OP_LINK = 11 */
329 329 {rfs4_op_link, nullfree, 0},
330 330
331 331 /* OP_LOCK = 12 */
332 332 {rfs4_op_lock, lock_denied_free, 0},
333 333
334 334 /* OP_LOCKT = 13 */
335 335 {rfs4_op_lockt, lock_denied_free, 0},
336 336
337 337 /* OP_LOCKU = 14 */
338 338 {rfs4_op_locku, nullfree, 0},
339 339
340 340 /* OP_LOOKUP = 15 */
341 341 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
342 342
343 343 /* OP_LOOKUPP = 16 */
344 344 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
345 345
346 346 /* OP_NVERIFY = 17 */
347 347 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
348 348
349 349 /* OP_OPEN = 18 */
350 350 {rfs4_op_open, rfs4_free_reply, 0},
351 351
352 352 /* OP_OPENATTR = 19 */
353 353 {rfs4_op_openattr, nullfree, 0},
354 354
355 355 /* OP_OPEN_CONFIRM = 20 */
356 356 {rfs4_op_open_confirm, nullfree, 0},
357 357
358 358 /* OP_OPEN_DOWNGRADE = 21 */
359 359 {rfs4_op_open_downgrade, nullfree, 0},
360 360
361 361 /* OP_OPEN_PUTFH = 22 */
362 362 {rfs4_op_putfh, nullfree, RPC_ALL},
363 363
364 364 /* OP_PUTPUBFH = 23 */
365 365 {rfs4_op_putpubfh, nullfree, RPC_ALL},
366 366
367 367 /* OP_PUTROOTFH = 24 */
368 368 {rfs4_op_putrootfh, nullfree, RPC_ALL},
369 369
370 370 /* OP_READ = 25 */
371 371 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
372 372
373 373 /* OP_READDIR = 26 */
374 374 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
375 375
376 376 /* OP_READLINK = 27 */
377 377 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
378 378
379 379 /* OP_REMOVE = 28 */
380 380 {rfs4_op_remove, nullfree, 0},
381 381
382 382 /* OP_RENAME = 29 */
383 383 {rfs4_op_rename, nullfree, 0},
384 384
385 385 /* OP_RENEW = 30 */
386 386 {rfs4_op_renew, nullfree, 0},
387 387
388 388 /* OP_RESTOREFH = 31 */
389 389 {rfs4_op_restorefh, nullfree, RPC_ALL},
390 390
391 391 /* OP_SAVEFH = 32 */
392 392 {rfs4_op_savefh, nullfree, RPC_ALL},
393 393
394 394 /* OP_SECINFO = 33 */
395 395 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
396 396
397 397 /* OP_SETATTR = 34 */
398 398 {rfs4_op_setattr, nullfree, 0},
399 399
400 400 /* OP_SETCLIENTID = 35 */
401 401 {rfs4_op_setclientid, nullfree, 0},
402 402
403 403 /* OP_SETCLIENTID_CONFIRM = 36 */
404 404 {rfs4_op_setclientid_confirm, nullfree, 0},
405 405
406 406 /* OP_VERIFY = 37 */
407 407 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
408 408
409 409 /* OP_WRITE = 38 */
410 410 {rfs4_op_write, nullfree, 0},
411 411
412 412 /* OP_RELEASE_LOCKOWNER = 39 */
413 413 {rfs4_op_release_lockowner, nullfree, 0},
414 414 };
415 415
416 416 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
417 417
418 418 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
419 419
420 420 #ifdef DEBUG
421 421
422 422 int rfs4_fillone_debug = 0;
423 423 int rfs4_no_stub_access = 1;
424 424 int rfs4_rddir_debug = 0;
425 425
426 426 static char *rfs4_op_string[] = {
427 427 "rfs4_op_null",
428 428 "rfs4_op_1 unused",
429 429 "rfs4_op_2 unused",
430 430 "rfs4_op_access",
431 431 "rfs4_op_close",
432 432 "rfs4_op_commit",
433 433 "rfs4_op_create",
434 434 "rfs4_op_delegpurge",
435 435 "rfs4_op_delegreturn",
436 436 "rfs4_op_getattr",
437 437 "rfs4_op_getfh",
438 438 "rfs4_op_link",
439 439 "rfs4_op_lock",
440 440 "rfs4_op_lockt",
441 441 "rfs4_op_locku",
442 442 "rfs4_op_lookup",
443 443 "rfs4_op_lookupp",
444 444 "rfs4_op_nverify",
445 445 "rfs4_op_open",
446 446 "rfs4_op_openattr",
447 447 "rfs4_op_open_confirm",
448 448 "rfs4_op_open_downgrade",
449 449 "rfs4_op_putfh",
450 450 "rfs4_op_putpubfh",
451 451 "rfs4_op_putrootfh",
452 452 "rfs4_op_read",
453 453 "rfs4_op_readdir",
454 454 "rfs4_op_readlink",
455 455 "rfs4_op_remove",
456 456 "rfs4_op_rename",
457 457 "rfs4_op_renew",
458 458 "rfs4_op_restorefh",
459 459 "rfs4_op_savefh",
460 460 "rfs4_op_secinfo",
461 461 "rfs4_op_setattr",
462 462 "rfs4_op_setclientid",
463 463 "rfs4_op_setclient_confirm",
464 464 "rfs4_op_verify",
465 465 "rfs4_op_write",
466 466 "rfs4_op_release_lockowner",
467 467 "rfs4_op_illegal"
468 468 };
469 469 #endif
470 470
471 471 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
472 472
473 473 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
474 474
475 475 extern void rfs4_free_fs_locations4(fs_locations4 *);
476 476
477 477 #ifdef nextdp
478 478 #undef nextdp
479 479 #endif
480 480 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
481 481
482 482 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
483 483 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
484 484 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
485 485 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
486 486 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
487 487 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
488 488 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
489 489 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
490 490 NULL, NULL
491 491 };
492 492 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
493 493 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
494 494 VOPNAME_READ, { .femop_read = deleg_wr_read },
495 495 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
496 496 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
497 497 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
498 498 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
499 499 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
500 500 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
501 501 NULL, NULL
502 502 };
503 503
504 504 /* ARGSUSED */
505 505 static void *
506 506 rfs4_zone_init(zoneid_t zoneid)
507 507 {
508 508 nfs4_srv_t *nsrv4;
509 509 timespec32_t verf;
510 510
511 511 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
512 512
513 513 /*
514 514 * The following algorithm attempts to find a unique verifier
515 515 * to be used as the write verifier returned from the server
516 516 * to the client. It is important that this verifier change
517 517 * whenever the server reboots. Of secondary importance, it
518 518 * is important for the verifier to be unique between two
519 519 * different servers.
520 520 *
521 521 * Thus, an attempt is made to use the system hostid and the
522 522 * current time in seconds when the nfssrv kernel module is
523 523 * loaded. It is assumed that an NFS server will not be able
524 524 * to boot and then to reboot in less than a second. If the
525 525 * hostid has not been set, then the current high resolution
526 526 * time is used. This will ensure different verifiers each
527 527 * time the server reboots and minimize the chances that two
528 528 * different servers will have the same verifier.
529 529 * XXX - this is broken on LP64 kernels.
530 530 */
531 531 verf.tv_sec = (time_t)zone_get_hostid(NULL);
532 532 if (verf.tv_sec != 0) {
533 533 verf.tv_nsec = gethrestime_sec();
534 534 } else {
535 535 timespec_t tverf;
536 536
537 537 gethrestime(&tverf);
538 538 verf.tv_sec = (time_t)tverf.tv_sec;
539 539 verf.tv_nsec = tverf.tv_nsec;
540 540 }
541 541 nsrv4->write4verf = *(uint64_t *)&verf;
542 542
543 543 /* Used to manage create/destroy of server state */
544 544 nsrv4->nfs4_server_state = NULL;
545 545 nsrv4->nfs4_cur_servinst = NULL;
546 546 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
547 547 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
548 548 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
549 549 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
550 550 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
551 551
552 552 return (nsrv4);
553 553 }
554 554
555 555 /* ARGSUSED */
556 556 static void
557 557 rfs4_zone_fini(zoneid_t zoneid, void *data)
558 558 {
559 559 nfs4_srv_t *nsrv4 = data;
560 560
561 561 mutex_destroy(&nsrv4->deleg_lock);
562 562 mutex_destroy(&nsrv4->state_lock);
563 563 mutex_destroy(&nsrv4->servinst_lock);
564 564 rw_destroy(&nsrv4->deleg_policy_lock);
565 565
566 566 kmem_free(nsrv4, sizeof (*nsrv4));
567 567 }
568 568
569 569 void
570 570 rfs4_srvrinit(void)
571 571 {
572 572 extern void rfs4_attr_init();
573 573
574 574 zone_key_create(&rfs4_zone_key, rfs4_zone_init, NULL, rfs4_zone_fini);
575 575
576 576 rfs4_attr_init();
577 577
578 578
579 579 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
580 580 rfs4_disable_delegation();
581 581 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
582 582 &deleg_wrops) != 0) {
583 583 rfs4_disable_delegation();
584 584 fem_free(deleg_rdops);
585 585 }
586 586
587 587 nfs4_srv_caller_id = fs_new_caller_id();
588 588 lockt_sysid = lm_alloc_sysidt();
589 589 vsd_create(&nfs4_srv_vkey, NULL);
590 590 rfs4_state_g_init();
591 591 }
592 592
593 593 void
594 594 rfs4_srvrfini(void)
595 595 {
596 596 if (lockt_sysid != LM_NOSYSID) {
597 597 lm_free_sysidt(lockt_sysid);
598 598 lockt_sysid = LM_NOSYSID;
599 599 }
600 600
601 601 rfs4_state_g_fini();
602 602
603 603 fem_free(deleg_rdops);
604 604 fem_free(deleg_wrops);
605 605
606 606 (void) zone_key_delete(rfs4_zone_key);
607 607 }
608 608
609 609 void
610 610 rfs4_do_server_start(int server_upordown,
611 611 int srv_delegation, int cluster_booted)
612 612 {
613 613 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
614 614
615 615 /* Is this a warm start? */
616 616 if (server_upordown == NFS_SERVER_QUIESCED) {
617 617 cmn_err(CE_NOTE, "nfs4_srv: "
618 618 "server was previously quiesced; "
619 619 "existing NFSv4 state will be re-used");
620 620
621 621 /*
622 622 * HA-NFSv4: this is also the signal
623 623 * that a Resource Group failover has
624 624 * occurred.
625 625 */
626 626 if (cluster_booted)
627 627 hanfsv4_failover(nsrv4);
628 628 } else {
629 629 /* Cold start */
630 630 nsrv4->rfs4_start_time = 0;
631 631 rfs4_state_zone_init(nsrv4);
632 632 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
633 633 nfs4_drc_hash);
634 634
635 635 /*
636 636 * The nfsd service was started with the -s option
637 637 * we need to pull in any state from the paths indicated.
638 638 */
639 639 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
640 640 /* read in the stable storage state from these paths */
641 641 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
642 642 rfs4_dss_newpaths);
643 643 }
644 644 }
645 645
646 646 /* Check if delegation is to be enabled */
647 647 if (srv_delegation != FALSE)
648 648 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
649 649 }
650 650
651 651 void
652 652 rfs4_init_compound_state(struct compound_state *cs)
653 653 {
654 654 bzero(cs, sizeof (*cs));
655 655 cs->cont = TRUE;
656 656 cs->access = CS_ACCESS_DENIED;
657 657 cs->deleg = FALSE;
658 658 cs->mandlock = FALSE;
659 659 cs->fh.nfs_fh4_val = cs->fhbuf;
660 660 }
661 661
662 662 void
663 663 rfs4_grace_start(rfs4_servinst_t *sip)
664 664 {
665 665 rw_enter(&sip->rwlock, RW_WRITER);
666 666 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
667 667 sip->grace_period = rfs4_grace_period;
668 668 rw_exit(&sip->rwlock);
669 669 }
670 670
671 671 /*
672 672 * returns true if the instance's grace period has never been started
673 673 */
674 674 int
675 675 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
676 676 {
677 677 time_t start_time;
678 678
679 679 rw_enter(&sip->rwlock, RW_READER);
680 680 start_time = sip->start_time;
681 681 rw_exit(&sip->rwlock);
682 682
683 683 return (start_time == 0);
684 684 }
685 685
686 686 /*
687 687 * Indicates if server instance is within the
688 688 * grace period.
689 689 */
690 690 int
691 691 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
692 692 {
693 693 time_t grace_expiry;
694 694
695 695 rw_enter(&sip->rwlock, RW_READER);
696 696 grace_expiry = sip->start_time + sip->grace_period;
697 697 rw_exit(&sip->rwlock);
698 698
699 699 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
700 700 }
701 701
702 702 int
703 703 rfs4_clnt_in_grace(rfs4_client_t *cp)
704 704 {
705 705 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
706 706
707 707 return (rfs4_servinst_in_grace(cp->rc_server_instance));
708 708 }
709 709
710 710 /*
711 711 * reset all currently active grace periods
712 712 */
713 713 void
714 714 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
715 715 {
716 716 rfs4_servinst_t *sip;
717 717
718 718 mutex_enter(&nsrv4->servinst_lock);
719 719 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
720 720 if (rfs4_servinst_in_grace(sip))
721 721 rfs4_grace_start(sip);
722 722 mutex_exit(&nsrv4->servinst_lock);
723 723 }
724 724
725 725 /*
726 726 * start any new instances' grace periods
727 727 */
728 728 void
729 729 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
730 730 {
731 731 rfs4_servinst_t *sip;
732 732
733 733 mutex_enter(&nsrv4->servinst_lock);
734 734 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
735 735 if (rfs4_servinst_grace_new(sip))
736 736 rfs4_grace_start(sip);
737 737 mutex_exit(&nsrv4->servinst_lock);
738 738 }
739 739
740 740 static rfs4_dss_path_t *
741 741 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
742 742 char *path, unsigned index)
743 743 {
744 744 size_t len;
745 745 rfs4_dss_path_t *dss_path;
746 746
747 747 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
748 748
749 749 /*
750 750 * Take a copy of the string, since the original may be overwritten.
751 751 * Sadly, no strdup() in the kernel.
752 752 */
753 753 /* allow for NUL */
754 754 len = strlen(path) + 1;
755 755 dss_path->path = kmem_alloc(len, KM_SLEEP);
756 756 (void) strlcpy(dss_path->path, path, len);
757 757
758 758 /* associate with servinst */
759 759 dss_path->sip = sip;
760 760 dss_path->index = index;
761 761
762 762 /*
763 763 * Add to list of served paths.
764 764 * No locking required, as we're only ever called at startup.
765 765 */
766 766 if (nsrv4->dss_pathlist == NULL) {
767 767 /* this is the first dss_path_t */
768 768
769 769 /* needed for insque/remque */
770 770 dss_path->next = dss_path->prev = dss_path;
771 771
772 772 nsrv4->dss_pathlist = dss_path;
773 773 } else {
774 774 insque(dss_path, nsrv4->dss_pathlist);
775 775 }
776 776
777 777 return (dss_path);
778 778 }
779 779
780 780 /*
781 781 * Create a new server instance, and make it the currently active instance.
782 782 * Note that starting the grace period too early will reduce the clients'
783 783 * recovery window.
784 784 */
785 785 void
786 786 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
787 787 int dss_npaths, char **dss_paths)
788 788 {
789 789 unsigned i;
790 790 rfs4_servinst_t *sip;
791 791 rfs4_oldstate_t *oldstate;
792 792
793 793 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
794 794 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
795 795
796 796 sip->start_time = (time_t)0;
797 797 sip->grace_period = (time_t)0;
798 798 sip->next = NULL;
799 799 sip->prev = NULL;
800 800
801 801 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
802 802 /*
803 803 * This initial dummy entry is required to setup for insque/remque.
804 804 * It must be skipped over whenever the list is traversed.
805 805 */
806 806 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
807 807 /* insque/remque require initial list entry to be self-terminated */
808 808 oldstate->next = oldstate;
809 809 oldstate->prev = oldstate;
810 810 sip->oldstate = oldstate;
811 811
812 812
813 813 sip->dss_npaths = dss_npaths;
814 814 sip->dss_paths = kmem_alloc(dss_npaths *
815 815 sizeof (rfs4_dss_path_t *), KM_SLEEP);
816 816
817 817 for (i = 0; i < dss_npaths; i++) {
818 818 /* CSTYLED */
819 819 sip->dss_paths[i] = rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
820 820 }
821 821
822 822 mutex_enter(&nsrv4->servinst_lock);
823 823 if (nsrv4->nfs4_cur_servinst != NULL) {
824 824 /* add to linked list */
825 825 sip->prev = nsrv4->nfs4_cur_servinst;
826 826 nsrv4->nfs4_cur_servinst->next = sip;
827 827 }
828 828 if (start_grace)
829 829 rfs4_grace_start(sip);
830 830 /* make the new instance "current" */
831 831 nsrv4->nfs4_cur_servinst = sip;
832 832
833 833 mutex_exit(&nsrv4->servinst_lock);
834 834 }
835 835
836 836 /*
837 837 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
838 838 * all instances directly.
839 839 */
840 840 void
841 841 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
842 842 {
843 843 rfs4_servinst_t *sip, *prev, *current;
844 844 #ifdef DEBUG
845 845 int n = 0;
846 846 #endif
847 847
848 848 mutex_enter(&nsrv4->servinst_lock);
849 849 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
850 850 current = nsrv4->nfs4_cur_servinst;
851 851 nsrv4->nfs4_cur_servinst = NULL;
852 852 for (sip = current; sip != NULL; sip = prev) {
853 853 prev = sip->prev;
854 854 rw_destroy(&sip->rwlock);
855 855 if (sip->oldstate)
856 856 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
857 857 if (sip->dss_paths)
858 858 kmem_free(sip->dss_paths,
859 859 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
860 860 kmem_free(sip, sizeof (rfs4_servinst_t));
861 861 #ifdef DEBUG
862 862 n++;
863 863 #endif
864 864 }
865 865 mutex_exit(&nsrv4->servinst_lock);
866 866 }
867 867
868 868 /*
869 869 * Assign the current server instance to a client_t.
870 870 * Should be called with cp->rc_dbe held.
871 871 */
872 872 void
873 873 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
874 874 rfs4_servinst_t *sip)
875 875 {
876 876 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
877 877
878 878 /*
879 879 * The lock ensures that if the current instance is in the process
880 880 * of changing, we will see the new one.
881 881 */
882 882 mutex_enter(&nsrv4->servinst_lock);
883 883 cp->rc_server_instance = sip;
884 884 mutex_exit(&nsrv4->servinst_lock);
885 885 }
886 886
887 887 rfs4_servinst_t *
888 888 rfs4_servinst(rfs4_client_t *cp)
889 889 {
890 890 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
891 891
892 892 return (cp->rc_server_instance);
893 893 }
894 894
895 895 /* ARGSUSED */
896 896 static void
897 897 nullfree(caddr_t resop)
898 898 {
899 899 }
900 900
901 901 /*
902 902 * This is a fall-through for invalid or not implemented (yet) ops
903 903 */
904 904 /* ARGSUSED */
905 905 static void
906 906 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
907 907 struct compound_state *cs)
908 908 {
909 909 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
910 910 }
911 911
912 912 /*
913 913 * Check if the security flavor, nfsnum, is in the flavor_list.
914 914 */
915 915 bool_t
916 916 in_flavor_list(int nfsnum, int *flavor_list, int count)
917 917 {
918 918 int i;
919 919
920 920 for (i = 0; i < count; i++) {
921 921 if (nfsnum == flavor_list[i])
922 922 return (TRUE);
923 923 }
924 924 return (FALSE);
925 925 }
926 926
927 927 /*
928 928 * Used by rfs4_op_secinfo to get the security information from the
929 929 * export structure associated with the component.
930 930 */
931 931 /* ARGSUSED */
932 932 static nfsstat4
933 933 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
934 934 {
935 935 int error, different_export = 0;
936 936 vnode_t *dvp, *vp;
937 937 struct exportinfo *exi = NULL;
938 938 fid_t fid;
939 939 uint_t count, i;
940 940 secinfo4 *resok_val;
941 941 struct secinfo *secp;
942 942 seconfig_t *si;
943 943 bool_t did_traverse = FALSE;
944 944 int dotdot, walk;
945 945 nfs_export_t *ne = nfs_get_export();
946 946
947 947 dvp = cs->vp;
948 948 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
949 949
950 950 /*
|
↓ open down ↓ |
950 lines elided |
↑ open up ↑ |
951 951 * If dotdotting, then need to check whether it's above the
952 952 * root of a filesystem, or above an export point.
953 953 */
954 954 if (dotdot) {
955 955
956 956 /*
957 957 * If dotdotting at the root of a filesystem, then
958 958 * need to traverse back to the mounted-on filesystem
959 959 * and do the dotdot lookup there.
960 960 */
961 - if (cs->vp->v_flag & VROOT) {
961 + if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
962 962
963 963 /*
964 964 * If at the system root, then can
965 965 * go up no further.
966 966 */
967 967 if (VN_CMP(dvp, ZONE_ROOTVP()))
968 968 return (puterrno4(ENOENT));
969 969
970 970 /*
971 971 * Traverse back to the mounted-on filesystem
972 972 */
973 973 dvp = untraverse(cs->vp);
974 974
975 975 /*
976 976 * Set the different_export flag so we remember
977 977 * to pick up a new exportinfo entry for
978 978 * this new filesystem.
979 979 */
980 980 different_export = 1;
981 981 } else {
982 982
983 983 /*
984 984 * If dotdotting above an export point then set
985 985 * the different_export to get new export info.
986 986 */
987 987 different_export = nfs_exported(cs->exi, cs->vp);
988 988 }
989 989 }
990 990
991 991 /*
992 992 * Get the vnode for the component "nm".
993 993 */
994 994 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
995 995 NULL, NULL, NULL);
996 996 if (error)
997 997 return (puterrno4(error));
998 998
999 999 /*
1000 1000 * If the vnode is in a pseudo filesystem, or if the security flavor
1001 1001 * used in the request is valid but not an explicitly shared flavor,
1002 1002 * or the access bit indicates that this is a limited access,
1003 1003 * check whether this vnode is visible.
1004 1004 */
1005 1005 if (!different_export &&
1006 1006 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
1007 1007 cs->access & CS_ACCESS_LIMITED)) {
1008 1008 if (! nfs_visible(cs->exi, vp, &different_export)) {
1009 1009 VN_RELE(vp);
1010 1010 return (puterrno4(ENOENT));
1011 1011 }
1012 1012 }
1013 1013
1014 1014 /*
1015 1015 * If it's a mountpoint, then traverse it.
1016 1016 */
1017 1017 if (vn_ismntpt(vp)) {
1018 1018 if ((error = traverse(&vp)) != 0) {
1019 1019 VN_RELE(vp);
1020 1020 return (puterrno4(error));
1021 1021 }
1022 1022 /* remember that we had to traverse mountpoint */
1023 1023 did_traverse = TRUE;
1024 1024 different_export = 1;
1025 1025 } else if (vp->v_vfsp != dvp->v_vfsp) {
1026 1026 /*
1027 1027 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1028 1028 * then vp is probably an LOFS object. We don't need the
1029 1029 * realvp, we just need to know that we might have crossed
1030 1030 * a server fs boundary and need to call checkexport4.
1031 1031 * (LOFS lookup hides server fs mountpoints, and actually calls
1032 1032 * traverse)
1033 1033 */
1034 1034 different_export = 1;
1035 1035 }
1036 1036
1037 1037 /*
1038 1038 * Get the export information for it.
1039 1039 */
1040 1040 if (different_export) {
1041 1041
1042 1042 bzero(&fid, sizeof (fid));
1043 1043 fid.fid_len = MAXFIDSZ;
1044 1044 error = vop_fid_pseudo(vp, &fid);
1045 1045 if (error) {
1046 1046 VN_RELE(vp);
1047 1047 return (puterrno4(error));
1048 1048 }
1049 1049
1050 1050 if (dotdot)
1051 1051 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1052 1052 else
1053 1053 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1054 1054
1055 1055 if (exi == NULL) {
1056 1056 if (did_traverse == TRUE) {
1057 1057 /*
1058 1058 * If this vnode is a mounted-on vnode,
1059 1059 * but the mounted-on file system is not
1060 1060 * exported, send back the secinfo for
1061 1061 * the exported node that the mounted-on
1062 1062 * vnode lives in.
1063 1063 */
1064 1064 exi = cs->exi;
1065 1065 } else {
1066 1066 VN_RELE(vp);
1067 1067 return (puterrno4(EACCES));
1068 1068 }
1069 1069 }
1070 1070 } else {
1071 1071 exi = cs->exi;
1072 1072 }
1073 1073 ASSERT(exi != NULL);
1074 1074
1075 1075
1076 1076 /*
1077 1077 * Create the secinfo result based on the security information
1078 1078 * from the exportinfo structure (exi).
1079 1079 *
1080 1080 * Return all flavors for a pseudo node.
1081 1081 * For a real export node, return the flavor that the client
1082 1082 * has access with.
1083 1083 */
1084 1084 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1085 1085 if (PSEUDO(exi)) {
1086 1086 count = exi->exi_export.ex_seccnt; /* total sec count */
1087 1087 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1088 1088 secp = exi->exi_export.ex_secinfo;
1089 1089
1090 1090 for (i = 0; i < count; i++) {
1091 1091 si = &secp[i].s_secinfo;
1092 1092 resok_val[i].flavor = si->sc_rpcnum;
1093 1093 if (resok_val[i].flavor == RPCSEC_GSS) {
1094 1094 rpcsec_gss_info *info;
1095 1095
1096 1096 info = &resok_val[i].flavor_info;
1097 1097 info->qop = si->sc_qop;
1098 1098 info->service = (rpc_gss_svc_t)si->sc_service;
1099 1099
1100 1100 /* get oid opaque data */
1101 1101 info->oid.sec_oid4_len =
1102 1102 si->sc_gss_mech_type->length;
1103 1103 info->oid.sec_oid4_val = kmem_alloc(
1104 1104 si->sc_gss_mech_type->length, KM_SLEEP);
1105 1105 bcopy(
1106 1106 si->sc_gss_mech_type->elements,
1107 1107 info->oid.sec_oid4_val,
1108 1108 info->oid.sec_oid4_len);
1109 1109 }
1110 1110 }
1111 1111 resp->SECINFO4resok_len = count;
1112 1112 resp->SECINFO4resok_val = resok_val;
1113 1113 } else {
1114 1114 int ret_cnt = 0, k = 0;
1115 1115 int *flavor_list;
1116 1116
1117 1117 count = exi->exi_export.ex_seccnt; /* total sec count */
1118 1118 secp = exi->exi_export.ex_secinfo;
1119 1119
1120 1120 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1121 1121 /* find out which flavors to return */
1122 1122 for (i = 0; i < count; i ++) {
1123 1123 int access, flavor, perm;
1124 1124
1125 1125 flavor = secp[i].s_secinfo.sc_nfsnum;
1126 1126 perm = secp[i].s_flags;
1127 1127
1128 1128 access = nfsauth4_secinfo_access(exi, cs->req,
1129 1129 flavor, perm, cs->basecr);
1130 1130
1131 1131 if (! (access & NFSAUTH_DENIED) &&
1132 1132 ! (access & NFSAUTH_WRONGSEC)) {
1133 1133 flavor_list[ret_cnt] = flavor;
1134 1134 ret_cnt++;
1135 1135 }
1136 1136 }
1137 1137
1138 1138 /* Create the returning SECINFO value */
1139 1139 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1140 1140
1141 1141 for (i = 0; i < count; i++) {
1142 1142 /*
1143 1143 * If the flavor is in the flavor list,
1144 1144 * fill in resok_val.
1145 1145 */
1146 1146 si = &secp[i].s_secinfo;
1147 1147 if (in_flavor_list(si->sc_nfsnum,
1148 1148 flavor_list, ret_cnt)) {
1149 1149 resok_val[k].flavor = si->sc_rpcnum;
1150 1150 if (resok_val[k].flavor == RPCSEC_GSS) {
1151 1151 rpcsec_gss_info *info;
1152 1152
1153 1153 info = &resok_val[k].flavor_info;
1154 1154 info->qop = si->sc_qop;
1155 1155 info->service = (rpc_gss_svc_t)
1156 1156 si->sc_service;
1157 1157
1158 1158 /* get oid opaque data */
1159 1159 info->oid.sec_oid4_len =
1160 1160 si->sc_gss_mech_type->length;
1161 1161 info->oid.sec_oid4_val = kmem_alloc(
1162 1162 si->sc_gss_mech_type->length,
1163 1163 KM_SLEEP);
1164 1164 bcopy(si->sc_gss_mech_type->elements,
1165 1165 info->oid.sec_oid4_val,
1166 1166 info->oid.sec_oid4_len);
1167 1167 }
1168 1168 k++;
1169 1169 }
1170 1170 if (k >= ret_cnt)
1171 1171 break;
1172 1172 }
1173 1173 resp->SECINFO4resok_len = ret_cnt;
1174 1174 resp->SECINFO4resok_val = resok_val;
1175 1175 kmem_free(flavor_list, count * sizeof (int));
1176 1176 }
1177 1177
1178 1178 VN_RELE(vp);
1179 1179 return (NFS4_OK);
1180 1180 }
1181 1181
1182 1182 /*
1183 1183 * SECINFO (Operation 33): Obtain required security information on
1184 1184 * the component name in the format of (security-mechanism-oid, qop, service)
1185 1185 * triplets.
1186 1186 */
1187 1187 /* ARGSUSED */
1188 1188 static void
1189 1189 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1190 1190 struct compound_state *cs)
1191 1191 {
1192 1192 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1193 1193 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1194 1194 utf8string *utfnm = &args->name;
1195 1195 uint_t len;
1196 1196 char *nm;
1197 1197 struct sockaddr *ca;
1198 1198 char *name = NULL;
1199 1199 nfsstat4 status = NFS4_OK;
1200 1200
1201 1201 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1202 1202 SECINFO4args *, args);
1203 1203
1204 1204 /*
1205 1205 * Current file handle (cfh) should have been set before getting
1206 1206 * into this function. If not, return error.
1207 1207 */
1208 1208 if (cs->vp == NULL) {
1209 1209 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1210 1210 goto out;
1211 1211 }
1212 1212
1213 1213 if (cs->vp->v_type != VDIR) {
1214 1214 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1215 1215 goto out;
1216 1216 }
1217 1217
1218 1218 /*
1219 1219 * Verify the component name. If failed, error out, but
1220 1220 * do not error out if the component name is a "..".
1221 1221 * SECINFO will return its parents secinfo data for SECINFO "..".
1222 1222 */
1223 1223 status = utf8_dir_verify(utfnm);
1224 1224 if (status != NFS4_OK) {
1225 1225 if (utfnm->utf8string_len != 2 ||
1226 1226 utfnm->utf8string_val[0] != '.' ||
1227 1227 utfnm->utf8string_val[1] != '.') {
1228 1228 *cs->statusp = resp->status = status;
1229 1229 goto out;
1230 1230 }
1231 1231 }
1232 1232
1233 1233 nm = utf8_to_str(utfnm, &len, NULL);
1234 1234 if (nm == NULL) {
1235 1235 *cs->statusp = resp->status = NFS4ERR_INVAL;
1236 1236 goto out;
1237 1237 }
1238 1238
1239 1239 if (len > MAXNAMELEN) {
1240 1240 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1241 1241 kmem_free(nm, len);
1242 1242 goto out;
1243 1243 }
1244 1244
1245 1245 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1246 1246 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1247 1247 MAXPATHLEN + 1);
1248 1248
1249 1249 if (name == NULL) {
1250 1250 *cs->statusp = resp->status = NFS4ERR_INVAL;
1251 1251 kmem_free(nm, len);
1252 1252 goto out;
1253 1253 }
1254 1254
1255 1255
1256 1256 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1257 1257
1258 1258 if (name != nm)
1259 1259 kmem_free(name, MAXPATHLEN + 1);
1260 1260 kmem_free(nm, len);
1261 1261
1262 1262 out:
1263 1263 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1264 1264 SECINFO4res *, resp);
1265 1265 }
1266 1266
1267 1267 /*
1268 1268 * Free SECINFO result.
1269 1269 */
1270 1270 /* ARGSUSED */
1271 1271 static void
1272 1272 rfs4_op_secinfo_free(nfs_resop4 *resop)
1273 1273 {
1274 1274 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1275 1275 int count, i;
1276 1276 secinfo4 *resok_val;
1277 1277
1278 1278 /* If this is not an Ok result, nothing to free. */
1279 1279 if (resp->status != NFS4_OK) {
1280 1280 return;
1281 1281 }
1282 1282
1283 1283 count = resp->SECINFO4resok_len;
1284 1284 resok_val = resp->SECINFO4resok_val;
1285 1285
1286 1286 for (i = 0; i < count; i++) {
1287 1287 if (resok_val[i].flavor == RPCSEC_GSS) {
1288 1288 rpcsec_gss_info *info;
1289 1289
1290 1290 info = &resok_val[i].flavor_info;
1291 1291 kmem_free(info->oid.sec_oid4_val,
1292 1292 info->oid.sec_oid4_len);
1293 1293 }
1294 1294 }
1295 1295 kmem_free(resok_val, count * sizeof (secinfo4));
1296 1296 resp->SECINFO4resok_len = 0;
1297 1297 resp->SECINFO4resok_val = NULL;
1298 1298 }
1299 1299
1300 1300 /* ARGSUSED */
1301 1301 static void
1302 1302 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1303 1303 struct compound_state *cs)
1304 1304 {
1305 1305 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1306 1306 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1307 1307 int error;
1308 1308 vnode_t *vp;
1309 1309 struct vattr va;
1310 1310 int checkwriteperm;
1311 1311 cred_t *cr = cs->cr;
1312 1312 bslabel_t *clabel, *slabel;
1313 1313 ts_label_t *tslabel;
1314 1314 boolean_t admin_low_client;
1315 1315
1316 1316 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1317 1317 ACCESS4args *, args);
1318 1318
1319 1319 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1320 1320 if (cs->access == CS_ACCESS_DENIED) {
1321 1321 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1322 1322 goto out;
1323 1323 }
1324 1324 #endif
1325 1325 if (cs->vp == NULL) {
1326 1326 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1327 1327 goto out;
1328 1328 }
1329 1329
1330 1330 ASSERT(cr != NULL);
1331 1331
1332 1332 vp = cs->vp;
1333 1333
1334 1334 /*
1335 1335 * If the file system is exported read only, it is not appropriate
1336 1336 * to check write permissions for regular files and directories.
1337 1337 * Special files are interpreted by the client, so the underlying
1338 1338 * permissions are sent back to the client for interpretation.
1339 1339 */
1340 1340 if (rdonly4(req, cs) &&
1341 1341 (vp->v_type == VREG || vp->v_type == VDIR))
1342 1342 checkwriteperm = 0;
1343 1343 else
1344 1344 checkwriteperm = 1;
1345 1345
1346 1346 /*
1347 1347 * XXX
1348 1348 * We need the mode so that we can correctly determine access
1349 1349 * permissions relative to a mandatory lock file. Access to
1350 1350 * mandatory lock files is denied on the server, so it might
1351 1351 * as well be reflected to the server during the open.
1352 1352 */
1353 1353 va.va_mask = AT_MODE;
1354 1354 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1355 1355 if (error) {
1356 1356 *cs->statusp = resp->status = puterrno4(error);
1357 1357 goto out;
1358 1358 }
1359 1359 resp->access = 0;
1360 1360 resp->supported = 0;
1361 1361
1362 1362 if (is_system_labeled()) {
1363 1363 ASSERT(req->rq_label != NULL);
1364 1364 clabel = req->rq_label;
1365 1365 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1366 1366 "got client label from request(1)",
1367 1367 struct svc_req *, req);
1368 1368 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1369 1369 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1370 1370 *cs->statusp = resp->status = puterrno4(EACCES);
1371 1371 goto out;
1372 1372 }
1373 1373 slabel = label2bslabel(tslabel);
1374 1374 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1375 1375 char *, "got server label(1) for vp(2)",
1376 1376 bslabel_t *, slabel, vnode_t *, vp);
1377 1377
1378 1378 admin_low_client = B_FALSE;
1379 1379 } else
1380 1380 admin_low_client = B_TRUE;
1381 1381 }
1382 1382
1383 1383 if (args->access & ACCESS4_READ) {
1384 1384 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1385 1385 if (!error && !MANDLOCK(vp, va.va_mode) &&
1386 1386 (!is_system_labeled() || admin_low_client ||
1387 1387 bldominates(clabel, slabel)))
1388 1388 resp->access |= ACCESS4_READ;
1389 1389 resp->supported |= ACCESS4_READ;
1390 1390 }
1391 1391 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1392 1392 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1393 1393 if (!error && (!is_system_labeled() || admin_low_client ||
1394 1394 bldominates(clabel, slabel)))
1395 1395 resp->access |= ACCESS4_LOOKUP;
1396 1396 resp->supported |= ACCESS4_LOOKUP;
1397 1397 }
1398 1398 if (checkwriteperm &&
1399 1399 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1400 1400 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1401 1401 if (!error && !MANDLOCK(vp, va.va_mode) &&
1402 1402 (!is_system_labeled() || admin_low_client ||
1403 1403 blequal(clabel, slabel)))
1404 1404 resp->access |=
1405 1405 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1406 1406 resp->supported |=
1407 1407 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1408 1408 }
1409 1409
1410 1410 if (checkwriteperm &&
1411 1411 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1412 1412 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1413 1413 if (!error && (!is_system_labeled() || admin_low_client ||
1414 1414 blequal(clabel, slabel)))
1415 1415 resp->access |= ACCESS4_DELETE;
1416 1416 resp->supported |= ACCESS4_DELETE;
1417 1417 }
1418 1418 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1419 1419 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1420 1420 if (!error && !MANDLOCK(vp, va.va_mode) &&
1421 1421 (!is_system_labeled() || admin_low_client ||
1422 1422 bldominates(clabel, slabel)))
1423 1423 resp->access |= ACCESS4_EXECUTE;
1424 1424 resp->supported |= ACCESS4_EXECUTE;
1425 1425 }
1426 1426
1427 1427 if (is_system_labeled() && !admin_low_client)
1428 1428 label_rele(tslabel);
1429 1429
1430 1430 *cs->statusp = resp->status = NFS4_OK;
1431 1431 out:
1432 1432 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1433 1433 ACCESS4res *, resp);
1434 1434 }
1435 1435
1436 1436 /* ARGSUSED */
1437 1437 static void
1438 1438 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1439 1439 struct compound_state *cs)
1440 1440 {
1441 1441 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1442 1442 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1443 1443 int error;
1444 1444 vnode_t *vp = cs->vp;
1445 1445 cred_t *cr = cs->cr;
1446 1446 vattr_t va;
1447 1447 nfs4_srv_t *nsrv4;
1448 1448
1449 1449 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1450 1450 COMMIT4args *, args);
1451 1451
1452 1452 if (vp == NULL) {
1453 1453 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1454 1454 goto out;
1455 1455 }
1456 1456 if (cs->access == CS_ACCESS_DENIED) {
1457 1457 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1458 1458 goto out;
1459 1459 }
1460 1460
1461 1461 if (args->offset + args->count < args->offset) {
1462 1462 *cs->statusp = resp->status = NFS4ERR_INVAL;
1463 1463 goto out;
1464 1464 }
1465 1465
1466 1466 va.va_mask = AT_UID;
1467 1467 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1468 1468
1469 1469 /*
1470 1470 * If we can't get the attributes, then we can't do the
1471 1471 * right access checking. So, we'll fail the request.
1472 1472 */
1473 1473 if (error) {
1474 1474 *cs->statusp = resp->status = puterrno4(error);
1475 1475 goto out;
1476 1476 }
1477 1477 if (rdonly4(req, cs)) {
1478 1478 *cs->statusp = resp->status = NFS4ERR_ROFS;
1479 1479 goto out;
1480 1480 }
1481 1481
1482 1482 if (vp->v_type != VREG) {
1483 1483 if (vp->v_type == VDIR)
1484 1484 resp->status = NFS4ERR_ISDIR;
1485 1485 else
1486 1486 resp->status = NFS4ERR_INVAL;
1487 1487 *cs->statusp = resp->status;
1488 1488 goto out;
1489 1489 }
1490 1490
1491 1491 if (crgetuid(cr) != va.va_uid &&
1492 1492 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1493 1493 *cs->statusp = resp->status = puterrno4(error);
1494 1494 goto out;
1495 1495 }
1496 1496
1497 1497 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1498 1498
1499 1499 if (error) {
1500 1500 *cs->statusp = resp->status = puterrno4(error);
1501 1501 goto out;
1502 1502 }
1503 1503
1504 1504 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1505 1505 *cs->statusp = resp->status = NFS4_OK;
1506 1506 resp->writeverf = nsrv4->write4verf;
1507 1507 out:
1508 1508 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1509 1509 COMMIT4res *, resp);
1510 1510 }
1511 1511
1512 1512 /*
1513 1513 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1514 1514 * was completed. It does the nfsv4 create for special files.
1515 1515 */
1516 1516 /* ARGSUSED */
1517 1517 static vnode_t *
1518 1518 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1519 1519 struct compound_state *cs, vattr_t *vap, char *nm)
1520 1520 {
1521 1521 int error;
1522 1522 cred_t *cr = cs->cr;
1523 1523 vnode_t *dvp = cs->vp;
1524 1524 vnode_t *vp = NULL;
1525 1525 int mode;
1526 1526 enum vcexcl excl;
1527 1527
1528 1528 switch (args->type) {
1529 1529 case NF4CHR:
1530 1530 case NF4BLK:
1531 1531 if (secpolicy_sys_devices(cr) != 0) {
1532 1532 *cs->statusp = resp->status = NFS4ERR_PERM;
1533 1533 return (NULL);
1534 1534 }
1535 1535 if (args->type == NF4CHR)
1536 1536 vap->va_type = VCHR;
1537 1537 else
1538 1538 vap->va_type = VBLK;
1539 1539 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1540 1540 args->ftype4_u.devdata.specdata2);
1541 1541 vap->va_mask |= AT_RDEV;
1542 1542 break;
1543 1543 case NF4SOCK:
1544 1544 vap->va_type = VSOCK;
1545 1545 break;
1546 1546 case NF4FIFO:
1547 1547 vap->va_type = VFIFO;
1548 1548 break;
1549 1549 default:
1550 1550 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1551 1551 return (NULL);
1552 1552 }
1553 1553
1554 1554 /*
1555 1555 * Must specify the mode.
1556 1556 */
1557 1557 if (!(vap->va_mask & AT_MODE)) {
1558 1558 *cs->statusp = resp->status = NFS4ERR_INVAL;
1559 1559 return (NULL);
1560 1560 }
1561 1561
1562 1562 excl = EXCL;
1563 1563
1564 1564 mode = 0;
1565 1565
1566 1566 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1567 1567 if (error) {
1568 1568 *cs->statusp = resp->status = puterrno4(error);
1569 1569 return (NULL);
1570 1570 }
1571 1571 return (vp);
1572 1572 }
1573 1573
1574 1574 /*
1575 1575 * nfsv4 create is used to create non-regular files. For regular files,
1576 1576 * use nfsv4 open.
1577 1577 */
1578 1578 /* ARGSUSED */
1579 1579 static void
1580 1580 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1581 1581 struct compound_state *cs)
1582 1582 {
1583 1583 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1584 1584 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1585 1585 int error;
1586 1586 struct vattr bva, iva, iva2, ava, *vap;
1587 1587 cred_t *cr = cs->cr;
1588 1588 vnode_t *dvp = cs->vp;
1589 1589 vnode_t *vp = NULL;
1590 1590 vnode_t *realvp;
1591 1591 char *nm, *lnm;
1592 1592 uint_t len, llen;
1593 1593 int syncval = 0;
1594 1594 struct nfs4_svgetit_arg sarg;
1595 1595 struct nfs4_ntov_table ntov;
1596 1596 struct statvfs64 sb;
1597 1597 nfsstat4 status;
1598 1598 struct sockaddr *ca;
1599 1599 char *name = NULL;
1600 1600 char *lname = NULL;
1601 1601
1602 1602 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1603 1603 CREATE4args *, args);
1604 1604
1605 1605 resp->attrset = 0;
1606 1606
1607 1607 if (dvp == NULL) {
1608 1608 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1609 1609 goto out;
1610 1610 }
1611 1611
1612 1612 /*
1613 1613 * If there is an unshared filesystem mounted on this vnode,
1614 1614 * do not allow to create an object in this directory.
1615 1615 */
1616 1616 if (vn_ismntpt(dvp)) {
1617 1617 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1618 1618 goto out;
1619 1619 }
1620 1620
1621 1621 /* Verify that type is correct */
1622 1622 switch (args->type) {
1623 1623 case NF4LNK:
1624 1624 case NF4BLK:
1625 1625 case NF4CHR:
1626 1626 case NF4SOCK:
1627 1627 case NF4FIFO:
1628 1628 case NF4DIR:
1629 1629 break;
1630 1630 default:
1631 1631 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1632 1632 goto out;
1633 1633 };
1634 1634
1635 1635 if (cs->access == CS_ACCESS_DENIED) {
1636 1636 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1637 1637 goto out;
1638 1638 }
1639 1639 if (dvp->v_type != VDIR) {
1640 1640 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1641 1641 goto out;
1642 1642 }
1643 1643 status = utf8_dir_verify(&args->objname);
1644 1644 if (status != NFS4_OK) {
1645 1645 *cs->statusp = resp->status = status;
1646 1646 goto out;
1647 1647 }
1648 1648
1649 1649 if (rdonly4(req, cs)) {
1650 1650 *cs->statusp = resp->status = NFS4ERR_ROFS;
1651 1651 goto out;
1652 1652 }
1653 1653
1654 1654 /*
1655 1655 * Name of newly created object
1656 1656 */
1657 1657 nm = utf8_to_fn(&args->objname, &len, NULL);
1658 1658 if (nm == NULL) {
1659 1659 *cs->statusp = resp->status = NFS4ERR_INVAL;
1660 1660 goto out;
1661 1661 }
1662 1662
1663 1663 if (len > MAXNAMELEN) {
1664 1664 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1665 1665 kmem_free(nm, len);
1666 1666 goto out;
1667 1667 }
1668 1668
1669 1669 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1670 1670 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1671 1671 MAXPATHLEN + 1);
1672 1672
1673 1673 if (name == NULL) {
1674 1674 *cs->statusp = resp->status = NFS4ERR_INVAL;
1675 1675 kmem_free(nm, len);
1676 1676 goto out;
1677 1677 }
1678 1678
1679 1679 resp->attrset = 0;
1680 1680
1681 1681 sarg.sbp = &sb;
1682 1682 sarg.is_referral = B_FALSE;
1683 1683 nfs4_ntov_table_init(&ntov);
1684 1684
1685 1685 status = do_rfs4_set_attrs(&resp->attrset,
1686 1686 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1687 1687
1688 1688 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1689 1689 status = NFS4ERR_INVAL;
1690 1690
1691 1691 if (status != NFS4_OK) {
1692 1692 *cs->statusp = resp->status = status;
1693 1693 if (name != nm)
1694 1694 kmem_free(name, MAXPATHLEN + 1);
1695 1695 kmem_free(nm, len);
1696 1696 nfs4_ntov_table_free(&ntov, &sarg);
1697 1697 resp->attrset = 0;
1698 1698 goto out;
1699 1699 }
1700 1700
1701 1701 /* Get "before" change value */
1702 1702 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1703 1703 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1704 1704 if (error) {
1705 1705 *cs->statusp = resp->status = puterrno4(error);
1706 1706 if (name != nm)
1707 1707 kmem_free(name, MAXPATHLEN + 1);
1708 1708 kmem_free(nm, len);
1709 1709 nfs4_ntov_table_free(&ntov, &sarg);
1710 1710 resp->attrset = 0;
1711 1711 goto out;
1712 1712 }
1713 1713 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1714 1714
1715 1715 vap = sarg.vap;
1716 1716
1717 1717 /*
1718 1718 * Set the default initial values for attributes when the parent
1719 1719 * directory does not have the VSUID/VSGID bit set and they have
1720 1720 * not been specified in createattrs.
1721 1721 */
1722 1722 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1723 1723 vap->va_uid = crgetuid(cr);
1724 1724 vap->va_mask |= AT_UID;
1725 1725 }
1726 1726 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1727 1727 vap->va_gid = crgetgid(cr);
1728 1728 vap->va_mask |= AT_GID;
1729 1729 }
1730 1730
1731 1731 vap->va_mask |= AT_TYPE;
1732 1732 switch (args->type) {
1733 1733 case NF4DIR:
1734 1734 vap->va_type = VDIR;
1735 1735 if ((vap->va_mask & AT_MODE) == 0) {
1736 1736 vap->va_mode = 0700; /* default: owner rwx only */
1737 1737 vap->va_mask |= AT_MODE;
1738 1738 }
1739 1739 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1740 1740 if (error)
1741 1741 break;
1742 1742
1743 1743 /*
1744 1744 * Get the initial "after" sequence number, if it fails,
1745 1745 * set to zero
1746 1746 */
1747 1747 iva.va_mask = AT_SEQ;
1748 1748 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1749 1749 iva.va_seq = 0;
1750 1750 break;
1751 1751 case NF4LNK:
1752 1752 vap->va_type = VLNK;
1753 1753 if ((vap->va_mask & AT_MODE) == 0) {
1754 1754 vap->va_mode = 0700; /* default: owner rwx only */
1755 1755 vap->va_mask |= AT_MODE;
1756 1756 }
1757 1757
1758 1758 /*
1759 1759 * symlink names must be treated as data
1760 1760 */
1761 1761 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1762 1762 &llen, NULL);
1763 1763
1764 1764 if (lnm == NULL) {
1765 1765 *cs->statusp = resp->status = NFS4ERR_INVAL;
1766 1766 if (name != nm)
1767 1767 kmem_free(name, MAXPATHLEN + 1);
1768 1768 kmem_free(nm, len);
1769 1769 nfs4_ntov_table_free(&ntov, &sarg);
1770 1770 resp->attrset = 0;
1771 1771 goto out;
1772 1772 }
1773 1773
1774 1774 if (llen > MAXPATHLEN) {
1775 1775 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1776 1776 if (name != nm)
1777 1777 kmem_free(name, MAXPATHLEN + 1);
1778 1778 kmem_free(nm, len);
1779 1779 kmem_free(lnm, llen);
1780 1780 nfs4_ntov_table_free(&ntov, &sarg);
1781 1781 resp->attrset = 0;
1782 1782 goto out;
1783 1783 }
1784 1784
1785 1785 lname = nfscmd_convname(ca, cs->exi, lnm,
1786 1786 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1787 1787
1788 1788 if (lname == NULL) {
1789 1789 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1790 1790 if (name != nm)
1791 1791 kmem_free(name, MAXPATHLEN + 1);
1792 1792 kmem_free(nm, len);
1793 1793 kmem_free(lnm, llen);
1794 1794 nfs4_ntov_table_free(&ntov, &sarg);
1795 1795 resp->attrset = 0;
1796 1796 goto out;
1797 1797 }
1798 1798
1799 1799 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1800 1800 if (lname != lnm)
1801 1801 kmem_free(lname, MAXPATHLEN + 1);
1802 1802 kmem_free(lnm, llen);
1803 1803 if (error)
1804 1804 break;
1805 1805
1806 1806 /*
1807 1807 * Get the initial "after" sequence number, if it fails,
1808 1808 * set to zero
1809 1809 */
1810 1810 iva.va_mask = AT_SEQ;
1811 1811 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1812 1812 iva.va_seq = 0;
1813 1813
1814 1814 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1815 1815 NULL, NULL, NULL);
1816 1816 if (error)
1817 1817 break;
1818 1818
1819 1819 /*
1820 1820 * va_seq is not safe over VOP calls, check it again
1821 1821 * if it has changed zero out iva to force atomic = FALSE.
1822 1822 */
1823 1823 iva2.va_mask = AT_SEQ;
1824 1824 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1825 1825 iva2.va_seq != iva.va_seq)
1826 1826 iva.va_seq = 0;
1827 1827 break;
1828 1828 default:
1829 1829 /*
1830 1830 * probably a special file.
1831 1831 */
1832 1832 if ((vap->va_mask & AT_MODE) == 0) {
1833 1833 vap->va_mode = 0600; /* default: owner rw only */
1834 1834 vap->va_mask |= AT_MODE;
1835 1835 }
1836 1836 syncval = FNODSYNC;
1837 1837 /*
1838 1838 * We know this will only generate one VOP call
1839 1839 */
1840 1840 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1841 1841
1842 1842 if (vp == NULL) {
1843 1843 if (name != nm)
1844 1844 kmem_free(name, MAXPATHLEN + 1);
1845 1845 kmem_free(nm, len);
1846 1846 nfs4_ntov_table_free(&ntov, &sarg);
1847 1847 resp->attrset = 0;
1848 1848 goto out;
1849 1849 }
1850 1850
1851 1851 /*
1852 1852 * Get the initial "after" sequence number, if it fails,
1853 1853 * set to zero
1854 1854 */
1855 1855 iva.va_mask = AT_SEQ;
1856 1856 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1857 1857 iva.va_seq = 0;
1858 1858
1859 1859 break;
1860 1860 }
1861 1861 if (name != nm)
1862 1862 kmem_free(name, MAXPATHLEN + 1);
1863 1863 kmem_free(nm, len);
1864 1864
1865 1865 if (error) {
1866 1866 *cs->statusp = resp->status = puterrno4(error);
1867 1867 }
1868 1868
1869 1869 /*
1870 1870 * Force modified data and metadata out to stable storage.
1871 1871 */
1872 1872 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1873 1873
1874 1874 if (resp->status != NFS4_OK) {
1875 1875 if (vp != NULL)
1876 1876 VN_RELE(vp);
1877 1877 nfs4_ntov_table_free(&ntov, &sarg);
1878 1878 resp->attrset = 0;
1879 1879 goto out;
1880 1880 }
1881 1881
1882 1882 /*
1883 1883 * Finish setup of cinfo response, "before" value already set.
1884 1884 * Get "after" change value, if it fails, simply return the
1885 1885 * before value.
1886 1886 */
1887 1887 ava.va_mask = AT_CTIME|AT_SEQ;
1888 1888 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1889 1889 ava.va_ctime = bva.va_ctime;
1890 1890 ava.va_seq = 0;
1891 1891 }
1892 1892 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1893 1893
1894 1894 /*
1895 1895 * True verification that object was created with correct
1896 1896 * attrs is impossible. The attrs could have been changed
1897 1897 * immediately after object creation. If attributes did
1898 1898 * not verify, the only recourse for the server is to
1899 1899 * destroy the object. Maybe if some attrs (like gid)
1900 1900 * are set incorrectly, the object should be destroyed;
1901 1901 * however, seems bad as a default policy. Do we really
1902 1902 * want to destroy an object over one of the times not
1903 1903 * verifying correctly? For these reasons, the server
1904 1904 * currently sets bits in attrset for createattrs
1905 1905 * that were set; however, no verification is done.
1906 1906 *
1907 1907 * vmask_to_nmask accounts for vattr bits set on create
1908 1908 * [do_rfs4_set_attrs() only sets resp bits for
1909 1909 * non-vattr/vfs bits.]
1910 1910 * Mask off any bits set by default so as not to return
1911 1911 * more attrset bits than were requested in createattrs
1912 1912 */
1913 1913 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1914 1914 resp->attrset &= args->createattrs.attrmask;
1915 1915 nfs4_ntov_table_free(&ntov, &sarg);
1916 1916
1917 1917 error = makefh4(&cs->fh, vp, cs->exi);
1918 1918 if (error) {
1919 1919 *cs->statusp = resp->status = puterrno4(error);
1920 1920 }
1921 1921
1922 1922 /*
1923 1923 * The cinfo.atomic = TRUE only if we got no errors, we have
1924 1924 * non-zero va_seq's, and it has incremented by exactly one
1925 1925 * during the creation and it didn't change during the VOP_LOOKUP
1926 1926 * or VOP_FSYNC.
1927 1927 */
1928 1928 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1929 1929 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1930 1930 resp->cinfo.atomic = TRUE;
1931 1931 else
1932 1932 resp->cinfo.atomic = FALSE;
1933 1933
1934 1934 /*
1935 1935 * Force modified metadata out to stable storage.
1936 1936 *
1937 1937 * if a underlying vp exists, pass it to VOP_FSYNC
1938 1938 */
1939 1939 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1940 1940 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1941 1941 else
1942 1942 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1943 1943
1944 1944 if (resp->status != NFS4_OK) {
1945 1945 VN_RELE(vp);
1946 1946 goto out;
1947 1947 }
1948 1948 if (cs->vp)
1949 1949 VN_RELE(cs->vp);
1950 1950
1951 1951 cs->vp = vp;
1952 1952 *cs->statusp = resp->status = NFS4_OK;
1953 1953 out:
1954 1954 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1955 1955 CREATE4res *, resp);
1956 1956 }
1957 1957
1958 1958 /*ARGSUSED*/
1959 1959 static void
1960 1960 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1961 1961 struct compound_state *cs)
1962 1962 {
1963 1963 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1964 1964 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1965 1965
1966 1966 rfs4_op_inval(argop, resop, req, cs);
1967 1967
1968 1968 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1969 1969 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1970 1970 }
1971 1971
1972 1972 /*ARGSUSED*/
1973 1973 static void
1974 1974 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1975 1975 struct compound_state *cs)
1976 1976 {
1977 1977 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1978 1978 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1979 1979 rfs4_deleg_state_t *dsp;
1980 1980 nfsstat4 status;
1981 1981
1982 1982 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1983 1983 DELEGRETURN4args *, args);
1984 1984
1985 1985 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1986 1986 resp->status = *cs->statusp = status;
1987 1987 if (status != NFS4_OK)
1988 1988 goto out;
1989 1989
1990 1990 /* Ensure specified filehandle matches */
1991 1991 if (cs->vp != dsp->rds_finfo->rf_vp) {
1992 1992 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1993 1993 } else
1994 1994 rfs4_return_deleg(dsp, FALSE);
1995 1995
1996 1996 rfs4_update_lease(dsp->rds_client);
1997 1997
1998 1998 rfs4_deleg_state_rele(dsp);
1999 1999 out:
2000 2000 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2001 2001 DELEGRETURN4res *, resp);
2002 2002 }
2003 2003
2004 2004 /*
2005 2005 * Check to see if a given "flavor" is an explicitly shared flavor.
2006 2006 * The assumption of this routine is the "flavor" is already a valid
2007 2007 * flavor in the secinfo list of "exi".
2008 2008 *
2009 2009 * e.g.
2010 2010 * # share -o sec=flavor1 /export
2011 2011 * # share -o sec=flavor2 /export/home
2012 2012 *
2013 2013 * flavor2 is not an explicitly shared flavor for /export,
2014 2014 * however it is in the secinfo list for /export thru the
2015 2015 * server namespace setup.
2016 2016 */
2017 2017 int
2018 2018 is_exported_sec(int flavor, struct exportinfo *exi)
2019 2019 {
2020 2020 int i;
2021 2021 struct secinfo *sp;
2022 2022
2023 2023 sp = exi->exi_export.ex_secinfo;
2024 2024 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2025 2025 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2026 2026 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2027 2027 return (SEC_REF_EXPORTED(&sp[i]));
2028 2028 }
2029 2029 }
2030 2030
2031 2031 /* Should not reach this point based on the assumption */
2032 2032 return (0);
2033 2033 }
2034 2034
2035 2035 /*
2036 2036 * Check if the security flavor used in the request matches what is
2037 2037 * required at the export point or at the root pseudo node (exi_root).
2038 2038 *
2039 2039 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2040 2040 *
2041 2041 */
2042 2042 static int
2043 2043 secinfo_match_or_authnone(struct compound_state *cs)
2044 2044 {
2045 2045 int i;
2046 2046 struct secinfo *sp;
2047 2047
2048 2048 /*
2049 2049 * Check cs->nfsflavor (from the request) against
2050 2050 * the current export data in cs->exi.
2051 2051 */
2052 2052 sp = cs->exi->exi_export.ex_secinfo;
2053 2053 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2054 2054 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2055 2055 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2056 2056 return (1);
2057 2057 }
2058 2058
2059 2059 return (0);
2060 2060 }
2061 2061
2062 2062 /*
2063 2063 * Check the access authority for the client and return the correct error.
2064 2064 */
2065 2065 nfsstat4
2066 2066 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2067 2067 {
2068 2068 int authres;
2069 2069
2070 2070 /*
2071 2071 * First, check if the security flavor used in the request
2072 2072 * are among the flavors set in the server namespace.
2073 2073 */
2074 2074 if (!secinfo_match_or_authnone(cs)) {
2075 2075 *cs->statusp = NFS4ERR_WRONGSEC;
2076 2076 return (*cs->statusp);
2077 2077 }
2078 2078
2079 2079 authres = checkauth4(cs, req);
2080 2080
2081 2081 if (authres > 0) {
2082 2082 *cs->statusp = NFS4_OK;
2083 2083 if (! (cs->access & CS_ACCESS_LIMITED))
2084 2084 cs->access = CS_ACCESS_OK;
2085 2085 } else if (authres == 0) {
2086 2086 *cs->statusp = NFS4ERR_ACCESS;
2087 2087 } else if (authres == -2) {
2088 2088 *cs->statusp = NFS4ERR_WRONGSEC;
2089 2089 } else {
2090 2090 *cs->statusp = NFS4ERR_DELAY;
2091 2091 }
2092 2092 return (*cs->statusp);
2093 2093 }
2094 2094
2095 2095 /*
2096 2096 * bitmap4_to_attrmask is called by getattr and readdir.
2097 2097 * It sets up the vattr mask and determines whether vfsstat call is needed
2098 2098 * based on the input bitmap.
2099 2099 * Returns nfsv4 status.
2100 2100 */
2101 2101 static nfsstat4
2102 2102 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2103 2103 {
2104 2104 int i;
2105 2105 uint_t va_mask;
2106 2106 struct statvfs64 *sbp = sargp->sbp;
2107 2107
2108 2108 sargp->sbp = NULL;
2109 2109 sargp->flag = 0;
2110 2110 sargp->rdattr_error = NFS4_OK;
2111 2111 sargp->mntdfid_set = FALSE;
2112 2112 if (sargp->cs->vp)
2113 2113 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2114 2114 FH4_ATTRDIR | FH4_NAMEDATTR);
2115 2115 else
2116 2116 sargp->xattr = 0;
2117 2117
2118 2118 /*
2119 2119 * Set rdattr_error_req to true if return error per
2120 2120 * failed entry rather than fail the readdir.
2121 2121 */
2122 2122 if (breq & FATTR4_RDATTR_ERROR_MASK)
2123 2123 sargp->rdattr_error_req = 1;
2124 2124 else
2125 2125 sargp->rdattr_error_req = 0;
2126 2126
2127 2127 /*
2128 2128 * generate the va_mask
2129 2129 * Handle the easy cases first
2130 2130 */
2131 2131 switch (breq) {
2132 2132 case NFS4_NTOV_ATTR_MASK:
2133 2133 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2134 2134 return (NFS4_OK);
2135 2135
2136 2136 case NFS4_FS_ATTR_MASK:
2137 2137 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2138 2138 sargp->sbp = sbp;
2139 2139 return (NFS4_OK);
2140 2140
2141 2141 case NFS4_NTOV_ATTR_CACHE_MASK:
2142 2142 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2143 2143 return (NFS4_OK);
2144 2144
2145 2145 case FATTR4_LEASE_TIME_MASK:
2146 2146 sargp->vap->va_mask = 0;
2147 2147 return (NFS4_OK);
2148 2148
2149 2149 default:
2150 2150 va_mask = 0;
2151 2151 for (i = 0; i < nfs4_ntov_map_size; i++) {
2152 2152 if ((breq & nfs4_ntov_map[i].fbit) &&
2153 2153 nfs4_ntov_map[i].vbit)
2154 2154 va_mask |= nfs4_ntov_map[i].vbit;
2155 2155 }
2156 2156
2157 2157 /*
2158 2158 * Check is vfsstat is needed
2159 2159 */
2160 2160 if (breq & NFS4_FS_ATTR_MASK)
2161 2161 sargp->sbp = sbp;
2162 2162
2163 2163 sargp->vap->va_mask = va_mask;
2164 2164 return (NFS4_OK);
2165 2165 }
2166 2166 /* NOTREACHED */
2167 2167 }
2168 2168
2169 2169 /*
2170 2170 * bitmap4_get_sysattrs is called by getattr and readdir.
2171 2171 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2172 2172 * Returns nfsv4 status.
2173 2173 */
2174 2174 static nfsstat4
2175 2175 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2176 2176 {
2177 2177 int error;
2178 2178 struct compound_state *cs = sargp->cs;
2179 2179 vnode_t *vp = cs->vp;
2180 2180
2181 2181 if (sargp->sbp != NULL) {
2182 2182 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2183 2183 sargp->sbp = NULL; /* to identify error */
2184 2184 return (puterrno4(error));
2185 2185 }
2186 2186 }
2187 2187
2188 2188 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2189 2189 }
2190 2190
2191 2191 static void
2192 2192 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2193 2193 {
2194 2194 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2195 2195 KM_SLEEP);
2196 2196 ntovp->attrcnt = 0;
2197 2197 ntovp->vfsstat = FALSE;
2198 2198 }
2199 2199
2200 2200 static void
2201 2201 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2202 2202 struct nfs4_svgetit_arg *sargp)
2203 2203 {
2204 2204 int i;
2205 2205 union nfs4_attr_u *na;
2206 2206 uint8_t *amap;
2207 2207
2208 2208 /*
2209 2209 * XXX Should do the same checks for whether the bit is set
2210 2210 */
2211 2211 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2212 2212 i < ntovp->attrcnt; i++, na++, amap++) {
2213 2213 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2214 2214 NFS4ATTR_FREEIT, sargp, na);
2215 2215 }
2216 2216 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2217 2217 /*
2218 2218 * xdr_free for getattr will be done later
2219 2219 */
2220 2220 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2221 2221 i < ntovp->attrcnt; i++, na++, amap++) {
2222 2222 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2223 2223 }
2224 2224 }
2225 2225 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2226 2226 }
2227 2227
2228 2228 /*
2229 2229 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2230 2230 */
2231 2231 static nfsstat4
2232 2232 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2233 2233 struct nfs4_svgetit_arg *sargp)
2234 2234 {
2235 2235 int error = 0;
2236 2236 int i, k;
2237 2237 struct nfs4_ntov_table ntov;
2238 2238 XDR xdr;
2239 2239 ulong_t xdr_size;
2240 2240 char *xdr_attrs;
2241 2241 nfsstat4 status = NFS4_OK;
2242 2242 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2243 2243 union nfs4_attr_u *na;
2244 2244 uint8_t *amap;
2245 2245
2246 2246 sargp->op = NFS4ATTR_GETIT;
2247 2247 sargp->flag = 0;
2248 2248
2249 2249 fattrp->attrmask = 0;
2250 2250 /* if no bits requested, then return empty fattr4 */
2251 2251 if (breq == 0) {
2252 2252 fattrp->attrlist4_len = 0;
2253 2253 fattrp->attrlist4 = NULL;
2254 2254 return (NFS4_OK);
2255 2255 }
2256 2256
2257 2257 /*
2258 2258 * return NFS4ERR_INVAL when client requests write-only attrs
2259 2259 */
2260 2260 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2261 2261 return (NFS4ERR_INVAL);
2262 2262
2263 2263 nfs4_ntov_table_init(&ntov);
2264 2264 na = ntov.na;
2265 2265 amap = ntov.amap;
2266 2266
2267 2267 /*
2268 2268 * Now loop to get or verify the attrs
2269 2269 */
2270 2270 for (i = 0; i < nfs4_ntov_map_size; i++) {
2271 2271 if (breq & nfs4_ntov_map[i].fbit) {
2272 2272 if ((*nfs4_ntov_map[i].sv_getit)(
2273 2273 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2274 2274
2275 2275 error = (*nfs4_ntov_map[i].sv_getit)(
2276 2276 NFS4ATTR_GETIT, sargp, na);
2277 2277
2278 2278 /*
2279 2279 * Possible error values:
2280 2280 * >0 if sv_getit failed to
2281 2281 * get the attr; 0 if succeeded;
2282 2282 * <0 if rdattr_error and the
2283 2283 * attribute cannot be returned.
2284 2284 */
2285 2285 if (error && !(sargp->rdattr_error_req))
2286 2286 goto done;
2287 2287 /*
2288 2288 * If error then just for entry
2289 2289 */
2290 2290 if (error == 0) {
2291 2291 fattrp->attrmask |=
2292 2292 nfs4_ntov_map[i].fbit;
2293 2293 *amap++ =
2294 2294 (uint8_t)nfs4_ntov_map[i].nval;
2295 2295 na++;
2296 2296 (ntov.attrcnt)++;
2297 2297 } else if ((error > 0) &&
2298 2298 (sargp->rdattr_error == NFS4_OK)) {
2299 2299 sargp->rdattr_error = puterrno4(error);
2300 2300 }
2301 2301 error = 0;
2302 2302 }
2303 2303 }
2304 2304 }
2305 2305
2306 2306 /*
2307 2307 * If rdattr_error was set after the return value for it was assigned,
2308 2308 * update it.
2309 2309 */
2310 2310 if (prev_rdattr_error != sargp->rdattr_error) {
2311 2311 na = ntov.na;
2312 2312 amap = ntov.amap;
2313 2313 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2314 2314 k = *amap;
2315 2315 if (k < FATTR4_RDATTR_ERROR) {
2316 2316 continue;
2317 2317 }
2318 2318 if ((k == FATTR4_RDATTR_ERROR) &&
2319 2319 ((*nfs4_ntov_map[k].sv_getit)(
2320 2320 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2321 2321
2322 2322 (void) (*nfs4_ntov_map[k].sv_getit)(
2323 2323 NFS4ATTR_GETIT, sargp, na);
2324 2324 }
2325 2325 break;
2326 2326 }
2327 2327 }
2328 2328
2329 2329 xdr_size = 0;
2330 2330 na = ntov.na;
2331 2331 amap = ntov.amap;
2332 2332 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2333 2333 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2334 2334 }
2335 2335
2336 2336 fattrp->attrlist4_len = xdr_size;
2337 2337 if (xdr_size) {
2338 2338 /* freed by rfs4_op_getattr_free() */
2339 2339 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2340 2340
2341 2341 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2342 2342
2343 2343 na = ntov.na;
2344 2344 amap = ntov.amap;
2345 2345 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2346 2346 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2347 2347 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2348 2348 int, *amap);
2349 2349 status = NFS4ERR_SERVERFAULT;
2350 2350 break;
2351 2351 }
2352 2352 }
2353 2353 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2354 2354 } else {
2355 2355 fattrp->attrlist4 = NULL;
2356 2356 }
2357 2357 done:
2358 2358
2359 2359 nfs4_ntov_table_free(&ntov, sargp);
2360 2360
2361 2361 if (error != 0)
2362 2362 status = puterrno4(error);
2363 2363
2364 2364 return (status);
2365 2365 }
2366 2366
2367 2367 /* ARGSUSED */
2368 2368 static void
2369 2369 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2370 2370 struct compound_state *cs)
2371 2371 {
2372 2372 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2373 2373 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2374 2374 struct nfs4_svgetit_arg sarg;
2375 2375 struct statvfs64 sb;
2376 2376 nfsstat4 status;
2377 2377
2378 2378 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2379 2379 GETATTR4args *, args);
2380 2380
2381 2381 if (cs->vp == NULL) {
2382 2382 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2383 2383 goto out;
2384 2384 }
2385 2385
2386 2386 if (cs->access == CS_ACCESS_DENIED) {
2387 2387 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2388 2388 goto out;
2389 2389 }
2390 2390
2391 2391 sarg.sbp = &sb;
2392 2392 sarg.cs = cs;
2393 2393 sarg.is_referral = B_FALSE;
2394 2394
2395 2395 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2396 2396 if (status == NFS4_OK) {
2397 2397
2398 2398 status = bitmap4_get_sysattrs(&sarg);
2399 2399 if (status == NFS4_OK) {
2400 2400
2401 2401 /* Is this a referral? */
2402 2402 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2403 2403 /* Older V4 Solaris client sees a link */
2404 2404 if (client_is_downrev(req))
2405 2405 sarg.vap->va_type = VLNK;
2406 2406 else
2407 2407 sarg.is_referral = B_TRUE;
2408 2408 }
2409 2409
2410 2410 status = do_rfs4_op_getattr(args->attr_request,
2411 2411 &resp->obj_attributes, &sarg);
2412 2412 }
2413 2413 }
2414 2414 *cs->statusp = resp->status = status;
2415 2415 out:
2416 2416 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2417 2417 GETATTR4res *, resp);
2418 2418 }
2419 2419
2420 2420 static void
2421 2421 rfs4_op_getattr_free(nfs_resop4 *resop)
2422 2422 {
2423 2423 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2424 2424
2425 2425 nfs4_fattr4_free(&resp->obj_attributes);
2426 2426 }
2427 2427
2428 2428 /* ARGSUSED */
2429 2429 static void
2430 2430 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2431 2431 struct compound_state *cs)
2432 2432 {
2433 2433 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2434 2434
2435 2435 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2436 2436
2437 2437 if (cs->vp == NULL) {
2438 2438 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2439 2439 goto out;
2440 2440 }
2441 2441 if (cs->access == CS_ACCESS_DENIED) {
2442 2442 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2443 2443 goto out;
2444 2444 }
2445 2445
2446 2446 /* check for reparse point at the share point */
2447 2447 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2448 2448 /* it's all bad */
2449 2449 cs->exi->exi_moved = 1;
2450 2450 *cs->statusp = resp->status = NFS4ERR_MOVED;
2451 2451 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2452 2452 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2453 2453 return;
2454 2454 }
2455 2455
2456 2456 /* check for reparse point at vp */
2457 2457 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2458 2458 /* it's not all bad */
2459 2459 *cs->statusp = resp->status = NFS4ERR_MOVED;
2460 2460 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2461 2461 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2462 2462 return;
2463 2463 }
2464 2464
2465 2465 resp->object.nfs_fh4_val =
2466 2466 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2467 2467 nfs_fh4_copy(&cs->fh, &resp->object);
2468 2468 *cs->statusp = resp->status = NFS4_OK;
2469 2469 out:
2470 2470 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2471 2471 GETFH4res *, resp);
2472 2472 }
2473 2473
2474 2474 static void
2475 2475 rfs4_op_getfh_free(nfs_resop4 *resop)
2476 2476 {
2477 2477 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2478 2478
2479 2479 if (resp->status == NFS4_OK &&
2480 2480 resp->object.nfs_fh4_val != NULL) {
2481 2481 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2482 2482 resp->object.nfs_fh4_val = NULL;
2483 2483 resp->object.nfs_fh4_len = 0;
2484 2484 }
2485 2485 }
2486 2486
2487 2487 /*
2488 2488 * illegal: args: void
2489 2489 * res : status (NFS4ERR_OP_ILLEGAL)
2490 2490 */
2491 2491 /* ARGSUSED */
2492 2492 static void
2493 2493 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2494 2494 struct svc_req *req, struct compound_state *cs)
2495 2495 {
2496 2496 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2497 2497
2498 2498 resop->resop = OP_ILLEGAL;
2499 2499 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2500 2500 }
2501 2501
2502 2502 /*
2503 2503 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2504 2504 * res: status. If success - CURRENT_FH unchanged, return change_info
2505 2505 */
2506 2506 /* ARGSUSED */
2507 2507 static void
2508 2508 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2509 2509 struct compound_state *cs)
2510 2510 {
2511 2511 LINK4args *args = &argop->nfs_argop4_u.oplink;
2512 2512 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2513 2513 int error;
2514 2514 vnode_t *vp;
2515 2515 vnode_t *dvp;
2516 2516 struct vattr bdva, idva, adva;
2517 2517 char *nm;
2518 2518 uint_t len;
2519 2519 struct sockaddr *ca;
2520 2520 char *name = NULL;
2521 2521 nfsstat4 status;
2522 2522
2523 2523 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2524 2524 LINK4args *, args);
2525 2525
2526 2526 /* SAVED_FH: source object */
2527 2527 vp = cs->saved_vp;
2528 2528 if (vp == NULL) {
2529 2529 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2530 2530 goto out;
2531 2531 }
2532 2532
2533 2533 /* CURRENT_FH: target directory */
2534 2534 dvp = cs->vp;
2535 2535 if (dvp == NULL) {
2536 2536 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2537 2537 goto out;
2538 2538 }
2539 2539
2540 2540 /*
2541 2541 * If there is a non-shared filesystem mounted on this vnode,
2542 2542 * do not allow to link any file in this directory.
2543 2543 */
2544 2544 if (vn_ismntpt(dvp)) {
2545 2545 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2546 2546 goto out;
2547 2547 }
2548 2548
2549 2549 if (cs->access == CS_ACCESS_DENIED) {
2550 2550 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2551 2551 goto out;
2552 2552 }
2553 2553
2554 2554 /* Check source object's type validity */
2555 2555 if (vp->v_type == VDIR) {
2556 2556 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2557 2557 goto out;
2558 2558 }
2559 2559
2560 2560 /* Check target directory's type */
2561 2561 if (dvp->v_type != VDIR) {
2562 2562 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2563 2563 goto out;
2564 2564 }
2565 2565
2566 2566 if (cs->saved_exi != cs->exi) {
2567 2567 *cs->statusp = resp->status = NFS4ERR_XDEV;
2568 2568 goto out;
2569 2569 }
2570 2570
2571 2571 status = utf8_dir_verify(&args->newname);
2572 2572 if (status != NFS4_OK) {
2573 2573 *cs->statusp = resp->status = status;
2574 2574 goto out;
2575 2575 }
2576 2576
2577 2577 nm = utf8_to_fn(&args->newname, &len, NULL);
2578 2578 if (nm == NULL) {
2579 2579 *cs->statusp = resp->status = NFS4ERR_INVAL;
2580 2580 goto out;
2581 2581 }
2582 2582
2583 2583 if (len > MAXNAMELEN) {
2584 2584 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2585 2585 kmem_free(nm, len);
2586 2586 goto out;
2587 2587 }
2588 2588
2589 2589 if (rdonly4(req, cs)) {
2590 2590 *cs->statusp = resp->status = NFS4ERR_ROFS;
2591 2591 kmem_free(nm, len);
2592 2592 goto out;
2593 2593 }
2594 2594
2595 2595 /* Get "before" change value */
2596 2596 bdva.va_mask = AT_CTIME|AT_SEQ;
2597 2597 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2598 2598 if (error) {
2599 2599 *cs->statusp = resp->status = puterrno4(error);
2600 2600 kmem_free(nm, len);
2601 2601 goto out;
2602 2602 }
2603 2603
2604 2604 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2605 2605 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2606 2606 MAXPATHLEN + 1);
2607 2607
2608 2608 if (name == NULL) {
2609 2609 *cs->statusp = resp->status = NFS4ERR_INVAL;
2610 2610 kmem_free(nm, len);
2611 2611 goto out;
2612 2612 }
2613 2613
2614 2614 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2615 2615
2616 2616 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2617 2617
2618 2618 if (nm != name)
2619 2619 kmem_free(name, MAXPATHLEN + 1);
2620 2620 kmem_free(nm, len);
2621 2621
2622 2622 /*
2623 2623 * Get the initial "after" sequence number, if it fails, set to zero
2624 2624 */
2625 2625 idva.va_mask = AT_SEQ;
2626 2626 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2627 2627 idva.va_seq = 0;
2628 2628
2629 2629 /*
2630 2630 * Force modified data and metadata out to stable storage.
2631 2631 */
2632 2632 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2633 2633 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2634 2634
2635 2635 if (error) {
2636 2636 *cs->statusp = resp->status = puterrno4(error);
2637 2637 goto out;
2638 2638 }
2639 2639
2640 2640 /*
2641 2641 * Get "after" change value, if it fails, simply return the
2642 2642 * before value.
2643 2643 */
2644 2644 adva.va_mask = AT_CTIME|AT_SEQ;
2645 2645 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2646 2646 adva.va_ctime = bdva.va_ctime;
2647 2647 adva.va_seq = 0;
2648 2648 }
2649 2649
2650 2650 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2651 2651
2652 2652 /*
2653 2653 * The cinfo.atomic = TRUE only if we have
2654 2654 * non-zero va_seq's, and it has incremented by exactly one
2655 2655 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2656 2656 */
2657 2657 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2658 2658 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2659 2659 resp->cinfo.atomic = TRUE;
2660 2660 else
2661 2661 resp->cinfo.atomic = FALSE;
2662 2662
2663 2663 *cs->statusp = resp->status = NFS4_OK;
2664 2664 out:
2665 2665 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2666 2666 LINK4res *, resp);
2667 2667 }
2668 2668
2669 2669 /*
2670 2670 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2671 2671 */
2672 2672
2673 2673 /* ARGSUSED */
2674 2674 static nfsstat4
2675 2675 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2676 2676 {
2677 2677 int error;
2678 2678 int different_export = 0;
2679 2679 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2680 2680 struct exportinfo *exi = NULL, *pre_exi = NULL;
2681 2681 nfsstat4 stat;
2682 2682 fid_t fid;
2683 2683 int attrdir, dotdot, walk;
2684 2684 bool_t is_newvp = FALSE;
2685 2685
2686 2686 if (cs->vp->v_flag & V_XATTRDIR) {
2687 2687 attrdir = 1;
2688 2688 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2689 2689 } else {
2690 2690 attrdir = 0;
2691 2691 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2692 2692 }
2693 2693
2694 2694 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2695 2695
2696 2696 /*
2697 2697 * If dotdotting, then need to check whether it's
|
↓ open down ↓ |
1726 lines elided |
↑ open up ↑ |
2698 2698 * above the root of a filesystem, or above an
2699 2699 * export point.
2700 2700 */
2701 2701 if (dotdot) {
2702 2702
2703 2703 /*
2704 2704 * If dotdotting at the root of a filesystem, then
2705 2705 * need to traverse back to the mounted-on filesystem
2706 2706 * and do the dotdot lookup there.
2707 2707 */
2708 - if (cs->vp->v_flag & VROOT) {
2708 + if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2709 2709
2710 2710 /*
2711 2711 * If at the system root, then can
2712 2712 * go up no further.
2713 2713 */
2714 2714 if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2715 2715 return (puterrno4(ENOENT));
2716 2716
2717 2717 /*
2718 2718 * Traverse back to the mounted-on filesystem
2719 2719 */
2720 2720 cs->vp = untraverse(cs->vp);
2721 2721
2722 2722 /*
2723 2723 * Set the different_export flag so we remember
2724 2724 * to pick up a new exportinfo entry for
2725 2725 * this new filesystem.
2726 2726 */
2727 2727 different_export = 1;
2728 2728 } else {
2729 2729
2730 2730 /*
2731 2731 * If dotdotting above an export point then set
2732 2732 * the different_export to get new export info.
2733 2733 */
2734 2734 different_export = nfs_exported(cs->exi, cs->vp);
2735 2735 }
2736 2736 }
2737 2737
2738 2738 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2739 2739 NULL, NULL, NULL);
2740 2740 if (error)
2741 2741 return (puterrno4(error));
2742 2742
2743 2743 /*
2744 2744 * If the vnode is in a pseudo filesystem, check whether it is visible.
2745 2745 *
2746 2746 * XXX if the vnode is a symlink and it is not visible in
2747 2747 * a pseudo filesystem, return ENOENT (not following symlink).
2748 2748 * V4 client can not mount such symlink. This is a regression
2749 2749 * from V2/V3.
2750 2750 *
2751 2751 * In the same exported filesystem, if the security flavor used
2752 2752 * is not an explicitly shared flavor, limit the view to the visible
2753 2753 * list entries only. This is not a WRONGSEC case because it's already
2754 2754 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2755 2755 */
2756 2756 if (!different_export &&
2757 2757 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2758 2758 cs->access & CS_ACCESS_LIMITED)) {
2759 2759 if (! nfs_visible(cs->exi, vp, &different_export)) {
2760 2760 VN_RELE(vp);
2761 2761 return (puterrno4(ENOENT));
2762 2762 }
2763 2763 }
2764 2764
2765 2765 /*
2766 2766 * If it's a mountpoint, then traverse it.
2767 2767 */
2768 2768 if (vn_ismntpt(vp)) {
2769 2769 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2770 2770 pre_tvp = vp; /* save pre-traversed vnode */
2771 2771
2772 2772 /*
2773 2773 * hold pre_tvp to counteract rele by traverse. We will
2774 2774 * need pre_tvp below if checkexport4 fails
2775 2775 */
2776 2776 VN_HOLD(pre_tvp);
2777 2777 if ((error = traverse(&vp)) != 0) {
2778 2778 VN_RELE(vp);
2779 2779 VN_RELE(pre_tvp);
2780 2780 return (puterrno4(error));
2781 2781 }
2782 2782 different_export = 1;
2783 2783 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2784 2784 /*
2785 2785 * The vfsp comparison is to handle the case where
2786 2786 * a LOFS mount is shared. lo_lookup traverses mount points,
2787 2787 * and NFS is unaware of local fs transistions because
2788 2788 * v_vfsmountedhere isn't set. For this special LOFS case,
2789 2789 * the dir and the obj returned by lookup will have different
2790 2790 * vfs ptrs.
2791 2791 */
2792 2792 different_export = 1;
2793 2793 }
2794 2794
2795 2795 if (different_export) {
2796 2796
2797 2797 bzero(&fid, sizeof (fid));
2798 2798 fid.fid_len = MAXFIDSZ;
2799 2799 error = vop_fid_pseudo(vp, &fid);
2800 2800 if (error) {
2801 2801 VN_RELE(vp);
2802 2802 if (pre_tvp)
2803 2803 VN_RELE(pre_tvp);
2804 2804 return (puterrno4(error));
2805 2805 }
2806 2806
2807 2807 if (dotdot)
2808 2808 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2809 2809 else
2810 2810 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2811 2811
2812 2812 if (exi == NULL) {
2813 2813 if (pre_tvp) {
2814 2814 /*
2815 2815 * If this vnode is a mounted-on vnode,
2816 2816 * but the mounted-on file system is not
2817 2817 * exported, send back the filehandle for
2818 2818 * the mounted-on vnode, not the root of
2819 2819 * the mounted-on file system.
2820 2820 */
2821 2821 VN_RELE(vp);
2822 2822 vp = pre_tvp;
2823 2823 exi = pre_exi;
2824 2824 } else {
2825 2825 VN_RELE(vp);
2826 2826 return (puterrno4(EACCES));
2827 2827 }
2828 2828 } else if (pre_tvp) {
2829 2829 /* we're done with pre_tvp now. release extra hold */
2830 2830 VN_RELE(pre_tvp);
2831 2831 }
2832 2832
2833 2833 cs->exi = exi;
2834 2834
2835 2835 /*
2836 2836 * Now we do a checkauth4. The reason is that
2837 2837 * this client/user may not have access to the new
2838 2838 * exported file system, and if they do,
2839 2839 * the client/user may be mapped to a different uid.
2840 2840 *
2841 2841 * We start with a new cr, because the checkauth4 done
2842 2842 * in the PUT*FH operation over wrote the cred's uid,
2843 2843 * gid, etc, and we want the real thing before calling
2844 2844 * checkauth4()
2845 2845 */
2846 2846 crfree(cs->cr);
2847 2847 cs->cr = crdup(cs->basecr);
2848 2848
2849 2849 oldvp = cs->vp;
2850 2850 cs->vp = vp;
2851 2851 is_newvp = TRUE;
2852 2852
2853 2853 stat = call_checkauth4(cs, req);
2854 2854 if (stat != NFS4_OK) {
2855 2855 VN_RELE(cs->vp);
2856 2856 cs->vp = oldvp;
2857 2857 return (stat);
2858 2858 }
2859 2859 }
2860 2860
2861 2861 /*
2862 2862 * After various NFS checks, do a label check on the path
2863 2863 * component. The label on this path should either be the
2864 2864 * global zone's label or a zone's label. We are only
2865 2865 * interested in the zone's label because exported files
2866 2866 * in global zone is accessible (though read-only) to
2867 2867 * clients. The exportability/visibility check is already
2868 2868 * done before reaching this code.
2869 2869 */
2870 2870 if (is_system_labeled()) {
2871 2871 bslabel_t *clabel;
2872 2872
2873 2873 ASSERT(req->rq_label != NULL);
2874 2874 clabel = req->rq_label;
2875 2875 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2876 2876 "got client label from request(1)", struct svc_req *, req);
2877 2877
2878 2878 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2879 2879 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2880 2880 cs->exi)) {
2881 2881 error = EACCES;
2882 2882 goto err_out;
2883 2883 }
2884 2884 } else {
2885 2885 /*
2886 2886 * We grant access to admin_low label clients
2887 2887 * only if the client is trusted, i.e. also
2888 2888 * running Solaris Trusted Extension.
2889 2889 */
2890 2890 struct sockaddr *ca;
2891 2891 int addr_type;
2892 2892 void *ipaddr;
2893 2893 tsol_tpc_t *tp;
2894 2894
2895 2895 ca = (struct sockaddr *)svc_getrpccaller(
2896 2896 req->rq_xprt)->buf;
2897 2897 if (ca->sa_family == AF_INET) {
2898 2898 addr_type = IPV4_VERSION;
2899 2899 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2900 2900 } else if (ca->sa_family == AF_INET6) {
2901 2901 addr_type = IPV6_VERSION;
2902 2902 ipaddr = &((struct sockaddr_in6 *)
2903 2903 ca)->sin6_addr;
2904 2904 }
2905 2905 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2906 2906 if (tp == NULL || tp->tpc_tp.tp_doi !=
2907 2907 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2908 2908 SUN_CIPSO) {
2909 2909 if (tp != NULL)
2910 2910 TPC_RELE(tp);
2911 2911 error = EACCES;
2912 2912 goto err_out;
2913 2913 }
2914 2914 TPC_RELE(tp);
2915 2915 }
2916 2916 }
2917 2917
2918 2918 error = makefh4(&cs->fh, vp, cs->exi);
2919 2919
2920 2920 err_out:
2921 2921 if (error) {
2922 2922 if (is_newvp) {
2923 2923 VN_RELE(cs->vp);
2924 2924 cs->vp = oldvp;
2925 2925 } else
2926 2926 VN_RELE(vp);
2927 2927 return (puterrno4(error));
2928 2928 }
2929 2929
2930 2930 if (!is_newvp) {
2931 2931 if (cs->vp)
2932 2932 VN_RELE(cs->vp);
2933 2933 cs->vp = vp;
2934 2934 } else if (oldvp)
2935 2935 VN_RELE(oldvp);
2936 2936
2937 2937 /*
2938 2938 * if did lookup on attrdir and didn't lookup .., set named
2939 2939 * attr fh flag
2940 2940 */
2941 2941 if (attrdir && ! dotdot)
2942 2942 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2943 2943
2944 2944 /* Assume false for now, open proc will set this */
2945 2945 cs->mandlock = FALSE;
2946 2946
2947 2947 return (NFS4_OK);
2948 2948 }
2949 2949
2950 2950 /* ARGSUSED */
2951 2951 static void
2952 2952 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2953 2953 struct compound_state *cs)
2954 2954 {
2955 2955 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2956 2956 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2957 2957 char *nm;
2958 2958 uint_t len;
2959 2959 struct sockaddr *ca;
2960 2960 char *name = NULL;
2961 2961 nfsstat4 status;
2962 2962
2963 2963 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2964 2964 LOOKUP4args *, args);
2965 2965
2966 2966 if (cs->vp == NULL) {
2967 2967 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2968 2968 goto out;
2969 2969 }
2970 2970
2971 2971 if (cs->vp->v_type == VLNK) {
2972 2972 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2973 2973 goto out;
2974 2974 }
2975 2975
2976 2976 if (cs->vp->v_type != VDIR) {
2977 2977 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2978 2978 goto out;
2979 2979 }
2980 2980
2981 2981 status = utf8_dir_verify(&args->objname);
2982 2982 if (status != NFS4_OK) {
2983 2983 *cs->statusp = resp->status = status;
2984 2984 goto out;
2985 2985 }
2986 2986
2987 2987 nm = utf8_to_str(&args->objname, &len, NULL);
2988 2988 if (nm == NULL) {
2989 2989 *cs->statusp = resp->status = NFS4ERR_INVAL;
2990 2990 goto out;
2991 2991 }
2992 2992
2993 2993 if (len > MAXNAMELEN) {
2994 2994 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2995 2995 kmem_free(nm, len);
2996 2996 goto out;
2997 2997 }
2998 2998
2999 2999 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3000 3000 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3001 3001 MAXPATHLEN + 1);
3002 3002
3003 3003 if (name == NULL) {
3004 3004 *cs->statusp = resp->status = NFS4ERR_INVAL;
3005 3005 kmem_free(nm, len);
3006 3006 goto out;
3007 3007 }
3008 3008
3009 3009 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3010 3010
3011 3011 if (name != nm)
3012 3012 kmem_free(name, MAXPATHLEN + 1);
3013 3013 kmem_free(nm, len);
3014 3014
3015 3015 out:
3016 3016 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3017 3017 LOOKUP4res *, resp);
3018 3018 }
3019 3019
3020 3020 /* ARGSUSED */
3021 3021 static void
3022 3022 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3023 3023 struct compound_state *cs)
3024 3024 {
3025 3025 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3026 3026
3027 3027 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3028 3028
3029 3029 if (cs->vp == NULL) {
3030 3030 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3031 3031 goto out;
3032 3032 }
3033 3033
3034 3034 if (cs->vp->v_type != VDIR) {
3035 3035 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3036 3036 goto out;
3037 3037 }
3038 3038
3039 3039 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3040 3040
3041 3041 /*
3042 3042 * From NFSV4 Specification, LOOKUPP should not check for
3043 3043 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3044 3044 */
3045 3045 if (resp->status == NFS4ERR_WRONGSEC) {
3046 3046 *cs->statusp = resp->status = NFS4_OK;
3047 3047 }
3048 3048
3049 3049 out:
3050 3050 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3051 3051 LOOKUPP4res *, resp);
3052 3052 }
3053 3053
3054 3054
3055 3055 /*ARGSUSED2*/
3056 3056 static void
3057 3057 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3058 3058 struct compound_state *cs)
3059 3059 {
3060 3060 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
3061 3061 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
3062 3062 vnode_t *avp = NULL;
3063 3063 int lookup_flags = LOOKUP_XATTR, error;
3064 3064 int exp_ro = 0;
3065 3065
3066 3066 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3067 3067 OPENATTR4args *, args);
3068 3068
3069 3069 if (cs->vp == NULL) {
3070 3070 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3071 3071 goto out;
3072 3072 }
3073 3073
3074 3074 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3075 3075 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3076 3076 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3077 3077 goto out;
3078 3078 }
3079 3079
3080 3080 /*
3081 3081 * If file system supports passing ACE mask to VOP_ACCESS then
3082 3082 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3083 3083 */
3084 3084
3085 3085 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3086 3086 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3087 3087 V_ACE_MASK, cs->cr, NULL);
3088 3088 else
3089 3089 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3090 3090 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3091 3091 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3092 3092
3093 3093 if (error) {
3094 3094 *cs->statusp = resp->status = puterrno4(EACCES);
3095 3095 goto out;
3096 3096 }
3097 3097
3098 3098 /*
3099 3099 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3100 3100 * the file system is exported read-only -- regardless of
3101 3101 * createdir flag. Otherwise the attrdir would be created
3102 3102 * (assuming server fs isn't mounted readonly locally). If
3103 3103 * VOP_LOOKUP returns ENOENT in this case, the error will
3104 3104 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3105 3105 * because specfs has no VOP_LOOKUP op, so the macro would
3106 3106 * return ENOSYS. EINVAL is returned by all (current)
3107 3107 * Solaris file system implementations when any of their
3108 3108 * restrictions are violated (xattr(dir) can't have xattrdir).
3109 3109 * Returning NOTSUPP is more appropriate in this case
3110 3110 * because the object will never be able to have an attrdir.
3111 3111 */
3112 3112 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3113 3113 lookup_flags |= CREATE_XATTR_DIR;
3114 3114
3115 3115 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3116 3116 NULL, NULL, NULL);
3117 3117
3118 3118 if (error) {
3119 3119 if (error == ENOENT && args->createdir && exp_ro)
3120 3120 *cs->statusp = resp->status = puterrno4(EROFS);
3121 3121 else if (error == EINVAL || error == ENOSYS)
3122 3122 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3123 3123 else
3124 3124 *cs->statusp = resp->status = puterrno4(error);
3125 3125 goto out;
3126 3126 }
3127 3127
3128 3128 ASSERT(avp->v_flag & V_XATTRDIR);
3129 3129
3130 3130 error = makefh4(&cs->fh, avp, cs->exi);
3131 3131
3132 3132 if (error) {
3133 3133 VN_RELE(avp);
3134 3134 *cs->statusp = resp->status = puterrno4(error);
3135 3135 goto out;
3136 3136 }
3137 3137
3138 3138 VN_RELE(cs->vp);
3139 3139 cs->vp = avp;
3140 3140
3141 3141 /*
3142 3142 * There is no requirement for an attrdir fh flag
3143 3143 * because the attrdir has a vnode flag to distinguish
3144 3144 * it from regular (non-xattr) directories. The
3145 3145 * FH4_ATTRDIR flag is set for future sanity checks.
3146 3146 */
3147 3147 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3148 3148 *cs->statusp = resp->status = NFS4_OK;
3149 3149
3150 3150 out:
3151 3151 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3152 3152 OPENATTR4res *, resp);
3153 3153 }
3154 3154
3155 3155 static int
3156 3156 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3157 3157 caller_context_t *ct)
3158 3158 {
3159 3159 int error;
3160 3160 int i;
3161 3161 clock_t delaytime;
3162 3162
3163 3163 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3164 3164
3165 3165 /*
3166 3166 * Don't block on mandatory locks. If this routine returns
3167 3167 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3168 3168 */
3169 3169 uio->uio_fmode = FNONBLOCK;
3170 3170
3171 3171 for (i = 0; i < rfs4_maxlock_tries; i++) {
3172 3172
3173 3173
3174 3174 if (direction == FREAD) {
3175 3175 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3176 3176 error = VOP_READ(vp, uio, ioflag, cred, ct);
3177 3177 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3178 3178 } else {
3179 3179 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3180 3180 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3181 3181 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3182 3182 }
3183 3183
3184 3184 if (error != EAGAIN)
3185 3185 break;
3186 3186
3187 3187 if (i < rfs4_maxlock_tries - 1) {
3188 3188 delay(delaytime);
3189 3189 delaytime *= 2;
3190 3190 }
3191 3191 }
3192 3192
3193 3193 return (error);
3194 3194 }
3195 3195
3196 3196 /* ARGSUSED */
3197 3197 static void
3198 3198 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3199 3199 struct compound_state *cs)
3200 3200 {
3201 3201 READ4args *args = &argop->nfs_argop4_u.opread;
3202 3202 READ4res *resp = &resop->nfs_resop4_u.opread;
3203 3203 int error;
3204 3204 int verror;
3205 3205 vnode_t *vp;
3206 3206 struct vattr va;
3207 3207 struct iovec iov, *iovp = NULL;
3208 3208 int iovcnt;
3209 3209 struct uio uio;
3210 3210 u_offset_t offset;
3211 3211 bool_t *deleg = &cs->deleg;
3212 3212 nfsstat4 stat;
3213 3213 int in_crit = 0;
3214 3214 mblk_t *mp = NULL;
3215 3215 int alloc_err = 0;
3216 3216 int rdma_used = 0;
3217 3217 int loaned_buffers;
3218 3218 caller_context_t ct;
3219 3219 struct uio *uiop;
3220 3220
3221 3221 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3222 3222 READ4args, args);
3223 3223
3224 3224 vp = cs->vp;
3225 3225 if (vp == NULL) {
3226 3226 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3227 3227 goto out;
3228 3228 }
3229 3229 if (cs->access == CS_ACCESS_DENIED) {
3230 3230 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3231 3231 goto out;
3232 3232 }
3233 3233
3234 3234 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3235 3235 deleg, TRUE, &ct)) != NFS4_OK) {
3236 3236 *cs->statusp = resp->status = stat;
3237 3237 goto out;
3238 3238 }
3239 3239
3240 3240 /*
3241 3241 * Enter the critical region before calling VOP_RWLOCK
3242 3242 * to avoid a deadlock with write requests.
3243 3243 */
3244 3244 if (nbl_need_check(vp)) {
3245 3245 nbl_start_crit(vp, RW_READER);
3246 3246 in_crit = 1;
3247 3247 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3248 3248 &ct)) {
3249 3249 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3250 3250 goto out;
3251 3251 }
3252 3252 }
3253 3253
3254 3254 if (args->wlist) {
3255 3255 if (args->count > clist_len(args->wlist)) {
3256 3256 *cs->statusp = resp->status = NFS4ERR_INVAL;
3257 3257 goto out;
3258 3258 }
3259 3259 rdma_used = 1;
3260 3260 }
3261 3261
3262 3262 /* use loaned buffers for TCP */
3263 3263 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3264 3264
3265 3265 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3266 3266 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3267 3267
3268 3268 /*
3269 3269 * If we can't get the attributes, then we can't do the
3270 3270 * right access checking. So, we'll fail the request.
3271 3271 */
3272 3272 if (verror) {
3273 3273 *cs->statusp = resp->status = puterrno4(verror);
3274 3274 goto out;
3275 3275 }
3276 3276
3277 3277 if (vp->v_type != VREG) {
3278 3278 *cs->statusp = resp->status =
3279 3279 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3280 3280 goto out;
3281 3281 }
3282 3282
3283 3283 if (crgetuid(cs->cr) != va.va_uid &&
3284 3284 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3285 3285 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3286 3286 *cs->statusp = resp->status = puterrno4(error);
3287 3287 goto out;
3288 3288 }
3289 3289
3290 3290 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3291 3291 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3292 3292 goto out;
3293 3293 }
3294 3294
3295 3295 offset = args->offset;
3296 3296 if (offset >= va.va_size) {
3297 3297 *cs->statusp = resp->status = NFS4_OK;
3298 3298 resp->eof = TRUE;
3299 3299 resp->data_len = 0;
3300 3300 resp->data_val = NULL;
3301 3301 resp->mblk = NULL;
3302 3302 /* RDMA */
3303 3303 resp->wlist = args->wlist;
3304 3304 resp->wlist_len = resp->data_len;
3305 3305 *cs->statusp = resp->status = NFS4_OK;
3306 3306 if (resp->wlist)
3307 3307 clist_zero_len(resp->wlist);
3308 3308 goto out;
3309 3309 }
3310 3310
3311 3311 if (args->count == 0) {
3312 3312 *cs->statusp = resp->status = NFS4_OK;
3313 3313 resp->eof = FALSE;
3314 3314 resp->data_len = 0;
3315 3315 resp->data_val = NULL;
3316 3316 resp->mblk = NULL;
3317 3317 /* RDMA */
3318 3318 resp->wlist = args->wlist;
3319 3319 resp->wlist_len = resp->data_len;
3320 3320 if (resp->wlist)
3321 3321 clist_zero_len(resp->wlist);
3322 3322 goto out;
3323 3323 }
3324 3324
3325 3325 /*
3326 3326 * Do not allocate memory more than maximum allowed
3327 3327 * transfer size
3328 3328 */
3329 3329 if (args->count > rfs4_tsize(req))
3330 3330 args->count = rfs4_tsize(req);
3331 3331
3332 3332 if (loaned_buffers) {
3333 3333 uiop = (uio_t *)rfs_setup_xuio(vp);
3334 3334 ASSERT(uiop != NULL);
3335 3335 uiop->uio_segflg = UIO_SYSSPACE;
3336 3336 uiop->uio_loffset = args->offset;
3337 3337 uiop->uio_resid = args->count;
3338 3338
3339 3339 /* Jump to do the read if successful */
3340 3340 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3341 3341 /*
3342 3342 * Need to hold the vnode until after VOP_RETZCBUF()
3343 3343 * is called.
3344 3344 */
3345 3345 VN_HOLD(vp);
3346 3346 goto doio_read;
3347 3347 }
3348 3348
3349 3349 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3350 3350 uiop->uio_loffset, int, uiop->uio_resid);
3351 3351
3352 3352 uiop->uio_extflg = 0;
3353 3353
3354 3354 /* failure to setup for zero copy */
3355 3355 rfs_free_xuio((void *)uiop);
3356 3356 loaned_buffers = 0;
3357 3357 }
3358 3358
3359 3359 /*
3360 3360 * If returning data via RDMA Write, then grab the chunk list. If we
3361 3361 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3362 3362 */
3363 3363 if (rdma_used) {
3364 3364 mp = NULL;
3365 3365 (void) rdma_get_wchunk(req, &iov, args->wlist);
3366 3366 uio.uio_iov = &iov;
3367 3367 uio.uio_iovcnt = 1;
3368 3368 } else {
3369 3369 /*
3370 3370 * mp will contain the data to be sent out in the read reply.
3371 3371 * It will be freed after the reply has been sent.
3372 3372 */
3373 3373 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3374 3374 ASSERT(mp != NULL);
3375 3375 ASSERT(alloc_err == 0);
3376 3376 uio.uio_iov = iovp;
3377 3377 uio.uio_iovcnt = iovcnt;
3378 3378 }
3379 3379
3380 3380 uio.uio_segflg = UIO_SYSSPACE;
3381 3381 uio.uio_extflg = UIO_COPY_CACHED;
3382 3382 uio.uio_loffset = args->offset;
3383 3383 uio.uio_resid = args->count;
3384 3384 uiop = &uio;
3385 3385
3386 3386 doio_read:
3387 3387 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3388 3388
3389 3389 va.va_mask = AT_SIZE;
3390 3390 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3391 3391
3392 3392 if (error) {
3393 3393 if (mp)
3394 3394 freemsg(mp);
3395 3395 *cs->statusp = resp->status = puterrno4(error);
3396 3396 goto out;
3397 3397 }
3398 3398
3399 3399 /* make mblk using zc buffers */
3400 3400 if (loaned_buffers) {
3401 3401 mp = uio_to_mblk(uiop);
3402 3402 ASSERT(mp != NULL);
3403 3403 }
3404 3404
3405 3405 *cs->statusp = resp->status = NFS4_OK;
3406 3406
3407 3407 ASSERT(uiop->uio_resid >= 0);
3408 3408 resp->data_len = args->count - uiop->uio_resid;
3409 3409 if (mp) {
3410 3410 resp->data_val = (char *)mp->b_datap->db_base;
3411 3411 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3412 3412 } else {
3413 3413 resp->data_val = (caddr_t)iov.iov_base;
3414 3414 }
3415 3415
3416 3416 resp->mblk = mp;
3417 3417
3418 3418 if (!verror && offset + resp->data_len == va.va_size)
3419 3419 resp->eof = TRUE;
3420 3420 else
3421 3421 resp->eof = FALSE;
3422 3422
3423 3423 if (rdma_used) {
3424 3424 if (!rdma_setup_read_data4(args, resp)) {
3425 3425 *cs->statusp = resp->status = NFS4ERR_INVAL;
3426 3426 }
3427 3427 } else {
3428 3428 resp->wlist = NULL;
3429 3429 }
3430 3430
3431 3431 out:
3432 3432 if (in_crit)
3433 3433 nbl_end_crit(vp);
3434 3434
3435 3435 if (iovp != NULL)
3436 3436 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3437 3437
3438 3438 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3439 3439 READ4res *, resp);
3440 3440 }
3441 3441
3442 3442 static void
3443 3443 rfs4_op_read_free(nfs_resop4 *resop)
3444 3444 {
3445 3445 READ4res *resp = &resop->nfs_resop4_u.opread;
3446 3446
3447 3447 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3448 3448 freemsg(resp->mblk);
3449 3449 resp->mblk = NULL;
3450 3450 resp->data_val = NULL;
3451 3451 resp->data_len = 0;
3452 3452 }
3453 3453 }
3454 3454
3455 3455 static void
3456 3456 rfs4_op_readdir_free(nfs_resop4 * resop)
3457 3457 {
3458 3458 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3459 3459
3460 3460 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3461 3461 freeb(resp->mblk);
3462 3462 resp->mblk = NULL;
3463 3463 resp->data_len = 0;
3464 3464 }
3465 3465 }
3466 3466
3467 3467
3468 3468 /* ARGSUSED */
3469 3469 static void
3470 3470 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3471 3471 struct compound_state *cs)
3472 3472 {
3473 3473 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3474 3474 int error;
3475 3475 vnode_t *vp;
3476 3476 struct exportinfo *exi, *sav_exi;
3477 3477 nfs_fh4_fmt_t *fh_fmtp;
3478 3478 nfs_export_t *ne = nfs_get_export();
3479 3479
3480 3480 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3481 3481
3482 3482 if (cs->vp) {
3483 3483 VN_RELE(cs->vp);
3484 3484 cs->vp = NULL;
3485 3485 }
3486 3486
3487 3487 if (cs->cr)
3488 3488 crfree(cs->cr);
3489 3489
3490 3490 cs->cr = crdup(cs->basecr);
3491 3491
3492 3492 vp = ne->exi_public->exi_vp;
3493 3493 if (vp == NULL) {
3494 3494 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3495 3495 goto out;
3496 3496 }
3497 3497
3498 3498 error = makefh4(&cs->fh, vp, ne->exi_public);
3499 3499 if (error != 0) {
3500 3500 *cs->statusp = resp->status = puterrno4(error);
3501 3501 goto out;
3502 3502 }
3503 3503 sav_exi = cs->exi;
3504 3504 if (ne->exi_public == ne->exi_root) {
3505 3505 /*
3506 3506 * No filesystem is actually shared public, so we default
3507 3507 * to exi_root. In this case, we must check whether root
3508 3508 * is exported.
3509 3509 */
3510 3510 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3511 3511
3512 3512 /*
3513 3513 * if root filesystem is exported, the exportinfo struct that we
3514 3514 * should use is what checkexport4 returns, because root_exi is
3515 3515 * actually a mostly empty struct.
3516 3516 */
3517 3517 exi = checkexport4(&fh_fmtp->fh4_fsid,
3518 3518 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3519 3519 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3520 3520 } else {
3521 3521 /*
3522 3522 * it's a properly shared filesystem
3523 3523 */
3524 3524 cs->exi = ne->exi_public;
3525 3525 }
3526 3526
3527 3527 if (is_system_labeled()) {
3528 3528 bslabel_t *clabel;
3529 3529
3530 3530 ASSERT(req->rq_label != NULL);
3531 3531 clabel = req->rq_label;
3532 3532 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3533 3533 "got client label from request(1)",
3534 3534 struct svc_req *, req);
3535 3535 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3536 3536 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3537 3537 cs->exi)) {
3538 3538 *cs->statusp = resp->status =
3539 3539 NFS4ERR_SERVERFAULT;
3540 3540 goto out;
3541 3541 }
3542 3542 }
3543 3543 }
3544 3544
3545 3545 VN_HOLD(vp);
3546 3546 cs->vp = vp;
3547 3547
3548 3548 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3549 3549 VN_RELE(cs->vp);
3550 3550 cs->vp = NULL;
3551 3551 cs->exi = sav_exi;
3552 3552 goto out;
3553 3553 }
3554 3554
3555 3555 *cs->statusp = resp->status = NFS4_OK;
3556 3556 out:
3557 3557 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3558 3558 PUTPUBFH4res *, resp);
3559 3559 }
3560 3560
3561 3561 /*
3562 3562 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3563 3563 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3564 3564 * or joe have restrictive search permissions, then we shouldn't let
3565 3565 * the client get a file handle. This is easy to enforce. However, we
3566 3566 * don't know what security flavor should be used until we resolve the
3567 3567 * path name. Another complication is uid mapping. If root is
3568 3568 * the user, then it will be mapped to the anonymous user by default,
3569 3569 * but we won't know that till we've resolved the path name. And we won't
3570 3570 * know what the anonymous user is.
3571 3571 * Luckily, SECINFO is specified to take a full filename.
3572 3572 * So what we will have to in rfs4_op_lookup is check that flavor of
3573 3573 * the target object matches that of the request, and if root was the
3574 3574 * caller, check for the root= and anon= options, and if necessary,
3575 3575 * repeat the lookup using the right cred_t. But that's not done yet.
3576 3576 */
3577 3577 /* ARGSUSED */
3578 3578 static void
3579 3579 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3580 3580 struct compound_state *cs)
3581 3581 {
3582 3582 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3583 3583 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3584 3584 nfs_fh4_fmt_t *fh_fmtp;
3585 3585
3586 3586 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3587 3587 PUTFH4args *, args);
3588 3588
3589 3589 if (cs->vp) {
3590 3590 VN_RELE(cs->vp);
3591 3591 cs->vp = NULL;
3592 3592 }
3593 3593
3594 3594 if (cs->cr) {
3595 3595 crfree(cs->cr);
3596 3596 cs->cr = NULL;
3597 3597 }
3598 3598
3599 3599
3600 3600 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3601 3601 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3602 3602 goto out;
3603 3603 }
3604 3604
3605 3605 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3606 3606 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3607 3607 NULL);
3608 3608
3609 3609 if (cs->exi == NULL) {
3610 3610 *cs->statusp = resp->status = NFS4ERR_STALE;
3611 3611 goto out;
3612 3612 }
3613 3613
3614 3614 cs->cr = crdup(cs->basecr);
3615 3615
3616 3616 ASSERT(cs->cr != NULL);
3617 3617
3618 3618 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3619 3619 *cs->statusp = resp->status;
3620 3620 goto out;
3621 3621 }
3622 3622
3623 3623 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3624 3624 VN_RELE(cs->vp);
3625 3625 cs->vp = NULL;
3626 3626 goto out;
3627 3627 }
3628 3628
3629 3629 nfs_fh4_copy(&args->object, &cs->fh);
3630 3630 *cs->statusp = resp->status = NFS4_OK;
3631 3631 cs->deleg = FALSE;
3632 3632
3633 3633 out:
3634 3634 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3635 3635 PUTFH4res *, resp);
3636 3636 }
3637 3637
3638 3638 /* ARGSUSED */
3639 3639 static void
3640 3640 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3641 3641 struct compound_state *cs)
3642 3642 {
3643 3643 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3644 3644 int error;
3645 3645 fid_t fid;
3646 3646 struct exportinfo *exi, *sav_exi;
3647 3647
3648 3648 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3649 3649
3650 3650 if (cs->vp) {
3651 3651 VN_RELE(cs->vp);
3652 3652 cs->vp = NULL;
3653 3653 }
3654 3654
3655 3655 if (cs->cr)
3656 3656 crfree(cs->cr);
3657 3657
3658 3658 cs->cr = crdup(cs->basecr);
3659 3659
3660 3660 /*
3661 3661 * Using rootdir, the system root vnode,
3662 3662 * get its fid.
3663 3663 */
3664 3664 bzero(&fid, sizeof (fid));
3665 3665 fid.fid_len = MAXFIDSZ;
3666 3666 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3667 3667 if (error != 0) {
3668 3668 *cs->statusp = resp->status = puterrno4(error);
3669 3669 goto out;
3670 3670 }
3671 3671
3672 3672 /*
3673 3673 * Then use the root fsid & fid it to find out if it's exported
3674 3674 *
3675 3675 * If the server root isn't exported directly, then
3676 3676 * it should at least be a pseudo export based on
3677 3677 * one or more exports further down in the server's
3678 3678 * file tree.
3679 3679 */
3680 3680 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3681 3681 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3682 3682 NFS4_DEBUG(rfs4_debug,
3683 3683 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3684 3684 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3685 3685 goto out;
3686 3686 }
3687 3687
3688 3688 /*
3689 3689 * Now make a filehandle based on the root
3690 3690 * export and root vnode.
3691 3691 */
3692 3692 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3693 3693 if (error != 0) {
3694 3694 *cs->statusp = resp->status = puterrno4(error);
3695 3695 goto out;
3696 3696 }
3697 3697
3698 3698 sav_exi = cs->exi;
3699 3699 cs->exi = exi;
3700 3700
3701 3701 VN_HOLD(ZONE_ROOTVP());
3702 3702 cs->vp = ZONE_ROOTVP();
3703 3703
3704 3704 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3705 3705 VN_RELE(cs->vp);
3706 3706 cs->vp = NULL;
3707 3707 cs->exi = sav_exi;
3708 3708 goto out;
3709 3709 }
3710 3710
3711 3711 *cs->statusp = resp->status = NFS4_OK;
3712 3712 cs->deleg = FALSE;
3713 3713 out:
3714 3714 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3715 3715 PUTROOTFH4res *, resp);
3716 3716 }
3717 3717
3718 3718 /*
3719 3719 * readlink: args: CURRENT_FH.
3720 3720 * res: status. If success - CURRENT_FH unchanged, return linktext.
3721 3721 */
3722 3722
3723 3723 /* ARGSUSED */
3724 3724 static void
3725 3725 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3726 3726 struct compound_state *cs)
3727 3727 {
3728 3728 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3729 3729 int error;
3730 3730 vnode_t *vp;
3731 3731 struct iovec iov;
3732 3732 struct vattr va;
3733 3733 struct uio uio;
3734 3734 char *data;
3735 3735 struct sockaddr *ca;
3736 3736 char *name = NULL;
3737 3737 int is_referral;
3738 3738
3739 3739 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3740 3740
3741 3741 /* CURRENT_FH: directory */
3742 3742 vp = cs->vp;
3743 3743 if (vp == NULL) {
3744 3744 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3745 3745 goto out;
3746 3746 }
3747 3747
3748 3748 if (cs->access == CS_ACCESS_DENIED) {
3749 3749 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3750 3750 goto out;
3751 3751 }
3752 3752
3753 3753 /* Is it a referral? */
3754 3754 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3755 3755
3756 3756 is_referral = 1;
3757 3757
3758 3758 } else {
3759 3759
3760 3760 is_referral = 0;
3761 3761
3762 3762 if (vp->v_type == VDIR) {
3763 3763 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3764 3764 goto out;
3765 3765 }
3766 3766
3767 3767 if (vp->v_type != VLNK) {
3768 3768 *cs->statusp = resp->status = NFS4ERR_INVAL;
3769 3769 goto out;
3770 3770 }
3771 3771
3772 3772 }
3773 3773
3774 3774 va.va_mask = AT_MODE;
3775 3775 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3776 3776 if (error) {
3777 3777 *cs->statusp = resp->status = puterrno4(error);
3778 3778 goto out;
3779 3779 }
3780 3780
3781 3781 if (MANDLOCK(vp, va.va_mode)) {
3782 3782 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3783 3783 goto out;
3784 3784 }
3785 3785
3786 3786 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3787 3787
3788 3788 if (is_referral) {
3789 3789 char *s;
3790 3790 size_t strsz;
3791 3791
3792 3792 /* Get an artificial symlink based on a referral */
3793 3793 s = build_symlink(vp, cs->cr, &strsz);
3794 3794 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3795 3795 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3796 3796 vnode_t *, vp, char *, s);
3797 3797 if (s == NULL)
3798 3798 error = EINVAL;
3799 3799 else {
3800 3800 error = 0;
3801 3801 (void) strlcpy(data, s, MAXPATHLEN + 1);
3802 3802 kmem_free(s, strsz);
3803 3803 }
3804 3804
3805 3805 } else {
3806 3806
3807 3807 iov.iov_base = data;
3808 3808 iov.iov_len = MAXPATHLEN;
3809 3809 uio.uio_iov = &iov;
3810 3810 uio.uio_iovcnt = 1;
3811 3811 uio.uio_segflg = UIO_SYSSPACE;
3812 3812 uio.uio_extflg = UIO_COPY_CACHED;
3813 3813 uio.uio_loffset = 0;
3814 3814 uio.uio_resid = MAXPATHLEN;
3815 3815
3816 3816 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3817 3817
3818 3818 if (!error)
3819 3819 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3820 3820 }
3821 3821
3822 3822 if (error) {
3823 3823 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3824 3824 *cs->statusp = resp->status = puterrno4(error);
3825 3825 goto out;
3826 3826 }
3827 3827
3828 3828 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3829 3829 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3830 3830 MAXPATHLEN + 1);
3831 3831
3832 3832 if (name == NULL) {
3833 3833 /*
3834 3834 * Even though the conversion failed, we return
3835 3835 * something. We just don't translate it.
3836 3836 */
3837 3837 name = data;
3838 3838 }
3839 3839
3840 3840 /*
3841 3841 * treat link name as data
3842 3842 */
3843 3843 (void) str_to_utf8(name, (utf8string *)&resp->link);
3844 3844
3845 3845 if (name != data)
3846 3846 kmem_free(name, MAXPATHLEN + 1);
3847 3847 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3848 3848 *cs->statusp = resp->status = NFS4_OK;
3849 3849
3850 3850 out:
3851 3851 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3852 3852 READLINK4res *, resp);
3853 3853 }
3854 3854
3855 3855 static void
3856 3856 rfs4_op_readlink_free(nfs_resop4 *resop)
3857 3857 {
3858 3858 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3859 3859 utf8string *symlink = (utf8string *)&resp->link;
3860 3860
3861 3861 if (symlink->utf8string_val) {
3862 3862 UTF8STRING_FREE(*symlink)
3863 3863 }
3864 3864 }
3865 3865
3866 3866 /*
3867 3867 * release_lockowner:
3868 3868 * Release any state associated with the supplied
3869 3869 * lockowner. Note if any lo_state is holding locks we will not
3870 3870 * rele that lo_state and thus the lockowner will not be destroyed.
3871 3871 * A client using lock after the lock owner stateid has been released
3872 3872 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3873 3873 * to reissue the lock with new_lock_owner set to TRUE.
3874 3874 * args: lock_owner
3875 3875 * res: status
3876 3876 */
3877 3877 /* ARGSUSED */
3878 3878 static void
3879 3879 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3880 3880 struct svc_req *req, struct compound_state *cs)
3881 3881 {
3882 3882 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3883 3883 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3884 3884 rfs4_lockowner_t *lo;
3885 3885 rfs4_openowner_t *oo;
3886 3886 rfs4_state_t *sp;
3887 3887 rfs4_lo_state_t *lsp;
3888 3888 rfs4_client_t *cp;
3889 3889 bool_t create = FALSE;
3890 3890 locklist_t *llist;
3891 3891 sysid_t sysid;
3892 3892
3893 3893 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3894 3894 cs, RELEASE_LOCKOWNER4args *, ap);
3895 3895
3896 3896 /* Make sure there is a clientid around for this request */
3897 3897 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3898 3898
3899 3899 if (cp == NULL) {
3900 3900 *cs->statusp = resp->status =
3901 3901 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3902 3902 goto out;
3903 3903 }
3904 3904 rfs4_client_rele(cp);
3905 3905
3906 3906 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3907 3907 if (lo == NULL) {
3908 3908 *cs->statusp = resp->status = NFS4_OK;
3909 3909 goto out;
3910 3910 }
3911 3911 ASSERT(lo->rl_client != NULL);
3912 3912
3913 3913 /*
3914 3914 * Check for EXPIRED client. If so will reap state with in a lease
3915 3915 * period or on next set_clientid_confirm step
3916 3916 */
3917 3917 if (rfs4_lease_expired(lo->rl_client)) {
3918 3918 rfs4_lockowner_rele(lo);
3919 3919 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3920 3920 goto out;
3921 3921 }
3922 3922
3923 3923 /*
3924 3924 * If no sysid has been assigned, then no locks exist; just return.
3925 3925 */
3926 3926 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3927 3927 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3928 3928 rfs4_lockowner_rele(lo);
3929 3929 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3930 3930 goto out;
3931 3931 }
3932 3932
3933 3933 sysid = lo->rl_client->rc_sysidt;
3934 3934 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3935 3935
3936 3936 /*
3937 3937 * Mark the lockowner invalid.
3938 3938 */
3939 3939 rfs4_dbe_hide(lo->rl_dbe);
3940 3940
3941 3941 /*
3942 3942 * sysid-pid pair should now not be used since the lockowner is
3943 3943 * invalid. If the client were to instantiate the lockowner again
3944 3944 * it would be assigned a new pid. Thus we can get the list of
3945 3945 * current locks.
3946 3946 */
3947 3947
3948 3948 llist = flk_get_active_locks(sysid, lo->rl_pid);
3949 3949 /* If we are still holding locks fail */
3950 3950 if (llist != NULL) {
3951 3951
3952 3952 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3953 3953
3954 3954 flk_free_locklist(llist);
3955 3955 /*
3956 3956 * We need to unhide the lockowner so the client can
3957 3957 * try it again. The bad thing here is if the client
3958 3958 * has a logic error that took it here in the first place
3959 3959 * they probably have lost accounting of the locks that it
3960 3960 * is holding. So we may have dangling state until the
3961 3961 * open owner state is reaped via close. One scenario
3962 3962 * that could possibly occur is that the client has
3963 3963 * sent the unlock request(s) in separate threads
3964 3964 * and has not waited for the replies before sending the
3965 3965 * RELEASE_LOCKOWNER request. Presumably, it would expect
3966 3966 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3967 3967 * reissuing the request.
3968 3968 */
3969 3969 rfs4_dbe_unhide(lo->rl_dbe);
3970 3970 rfs4_lockowner_rele(lo);
3971 3971 goto out;
3972 3972 }
3973 3973
3974 3974 /*
3975 3975 * For the corresponding client we need to check each open
3976 3976 * owner for any opens that have lockowner state associated
3977 3977 * with this lockowner.
3978 3978 */
3979 3979
3980 3980 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3981 3981 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3982 3982 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3983 3983
3984 3984 rfs4_dbe_lock(oo->ro_dbe);
3985 3985 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3986 3986 sp = list_next(&oo->ro_statelist, sp)) {
3987 3987
3988 3988 rfs4_dbe_lock(sp->rs_dbe);
3989 3989 for (lsp = list_head(&sp->rs_lostatelist);
3990 3990 lsp != NULL;
3991 3991 lsp = list_next(&sp->rs_lostatelist, lsp)) {
3992 3992 if (lsp->rls_locker == lo) {
3993 3993 rfs4_dbe_lock(lsp->rls_dbe);
3994 3994 rfs4_dbe_invalidate(lsp->rls_dbe);
3995 3995 rfs4_dbe_unlock(lsp->rls_dbe);
3996 3996 }
3997 3997 }
3998 3998 rfs4_dbe_unlock(sp->rs_dbe);
3999 3999 }
4000 4000 rfs4_dbe_unlock(oo->ro_dbe);
4001 4001 }
4002 4002 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4003 4003
4004 4004 rfs4_lockowner_rele(lo);
4005 4005
4006 4006 *cs->statusp = resp->status = NFS4_OK;
4007 4007
4008 4008 out:
4009 4009 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4010 4010 cs, RELEASE_LOCKOWNER4res *, resp);
4011 4011 }
4012 4012
4013 4013 /*
4014 4014 * short utility function to lookup a file and recall the delegation
4015 4015 */
4016 4016 static rfs4_file_t *
4017 4017 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4018 4018 int *lkup_error, cred_t *cr)
4019 4019 {
4020 4020 vnode_t *vp;
4021 4021 rfs4_file_t *fp = NULL;
4022 4022 bool_t fcreate = FALSE;
4023 4023 int error;
4024 4024
4025 4025 if (vpp)
4026 4026 *vpp = NULL;
4027 4027
4028 4028 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4029 4029 NULL)) == 0) {
4030 4030 if (vp->v_type == VREG)
4031 4031 fp = rfs4_findfile(vp, NULL, &fcreate);
4032 4032 if (vpp)
4033 4033 *vpp = vp;
4034 4034 else
4035 4035 VN_RELE(vp);
4036 4036 }
4037 4037
4038 4038 if (lkup_error)
4039 4039 *lkup_error = error;
4040 4040
4041 4041 return (fp);
4042 4042 }
4043 4043
4044 4044 /*
4045 4045 * remove: args: CURRENT_FH: directory; name.
4046 4046 * res: status. If success - CURRENT_FH unchanged, return change_info
4047 4047 * for directory.
4048 4048 */
4049 4049 /* ARGSUSED */
4050 4050 static void
4051 4051 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4052 4052 struct compound_state *cs)
4053 4053 {
4054 4054 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4055 4055 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4056 4056 int error;
4057 4057 vnode_t *dvp, *vp;
4058 4058 struct vattr bdva, idva, adva;
4059 4059 char *nm;
4060 4060 uint_t len;
4061 4061 rfs4_file_t *fp;
4062 4062 int in_crit = 0;
4063 4063 bslabel_t *clabel;
4064 4064 struct sockaddr *ca;
4065 4065 char *name = NULL;
4066 4066 nfsstat4 status;
4067 4067
4068 4068 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4069 4069 REMOVE4args *, args);
4070 4070
4071 4071 /* CURRENT_FH: directory */
4072 4072 dvp = cs->vp;
4073 4073 if (dvp == NULL) {
4074 4074 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4075 4075 goto out;
4076 4076 }
4077 4077
4078 4078 if (cs->access == CS_ACCESS_DENIED) {
4079 4079 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4080 4080 goto out;
4081 4081 }
4082 4082
4083 4083 /*
4084 4084 * If there is an unshared filesystem mounted on this vnode,
4085 4085 * Do not allow to remove anything in this directory.
4086 4086 */
4087 4087 if (vn_ismntpt(dvp)) {
4088 4088 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4089 4089 goto out;
4090 4090 }
4091 4091
4092 4092 if (dvp->v_type != VDIR) {
4093 4093 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4094 4094 goto out;
4095 4095 }
4096 4096
4097 4097 status = utf8_dir_verify(&args->target);
4098 4098 if (status != NFS4_OK) {
4099 4099 *cs->statusp = resp->status = status;
4100 4100 goto out;
4101 4101 }
4102 4102
4103 4103 /*
4104 4104 * Lookup the file so that we can check if it's a directory
4105 4105 */
4106 4106 nm = utf8_to_fn(&args->target, &len, NULL);
4107 4107 if (nm == NULL) {
4108 4108 *cs->statusp = resp->status = NFS4ERR_INVAL;
4109 4109 goto out;
4110 4110 }
4111 4111
4112 4112 if (len > MAXNAMELEN) {
4113 4113 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4114 4114 kmem_free(nm, len);
4115 4115 goto out;
4116 4116 }
4117 4117
4118 4118 if (rdonly4(req, cs)) {
4119 4119 *cs->statusp = resp->status = NFS4ERR_ROFS;
4120 4120 kmem_free(nm, len);
4121 4121 goto out;
4122 4122 }
4123 4123
4124 4124 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4125 4125 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4126 4126 MAXPATHLEN + 1);
4127 4127
4128 4128 if (name == NULL) {
4129 4129 *cs->statusp = resp->status = NFS4ERR_INVAL;
4130 4130 kmem_free(nm, len);
4131 4131 goto out;
4132 4132 }
4133 4133
4134 4134 /*
4135 4135 * Lookup the file to determine type and while we are see if
4136 4136 * there is a file struct around and check for delegation.
4137 4137 * We don't need to acquire va_seq before this lookup, if
4138 4138 * it causes an update, cinfo.before will not match, which will
4139 4139 * trigger a cache flush even if atomic is TRUE.
4140 4140 */
4141 4141 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4142 4142 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4143 4143 NULL)) {
4144 4144 VN_RELE(vp);
4145 4145 rfs4_file_rele(fp);
4146 4146 *cs->statusp = resp->status = NFS4ERR_DELAY;
4147 4147 if (nm != name)
4148 4148 kmem_free(name, MAXPATHLEN + 1);
4149 4149 kmem_free(nm, len);
4150 4150 goto out;
4151 4151 }
4152 4152 }
4153 4153
4154 4154 /* Didn't find anything to remove */
4155 4155 if (vp == NULL) {
4156 4156 *cs->statusp = resp->status = error;
4157 4157 if (nm != name)
4158 4158 kmem_free(name, MAXPATHLEN + 1);
4159 4159 kmem_free(nm, len);
4160 4160 goto out;
4161 4161 }
4162 4162
4163 4163 if (nbl_need_check(vp)) {
4164 4164 nbl_start_crit(vp, RW_READER);
4165 4165 in_crit = 1;
4166 4166 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4167 4167 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4168 4168 if (nm != name)
4169 4169 kmem_free(name, MAXPATHLEN + 1);
4170 4170 kmem_free(nm, len);
4171 4171 nbl_end_crit(vp);
4172 4172 VN_RELE(vp);
4173 4173 if (fp) {
4174 4174 rfs4_clear_dont_grant(fp);
4175 4175 rfs4_file_rele(fp);
4176 4176 }
4177 4177 goto out;
4178 4178 }
4179 4179 }
4180 4180
4181 4181 /* check label before allowing removal */
4182 4182 if (is_system_labeled()) {
4183 4183 ASSERT(req->rq_label != NULL);
4184 4184 clabel = req->rq_label;
4185 4185 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4186 4186 "got client label from request(1)",
4187 4187 struct svc_req *, req);
4188 4188 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4189 4189 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4190 4190 cs->exi)) {
4191 4191 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4192 4192 if (name != nm)
4193 4193 kmem_free(name, MAXPATHLEN + 1);
4194 4194 kmem_free(nm, len);
4195 4195 if (in_crit)
4196 4196 nbl_end_crit(vp);
4197 4197 VN_RELE(vp);
4198 4198 if (fp) {
4199 4199 rfs4_clear_dont_grant(fp);
4200 4200 rfs4_file_rele(fp);
4201 4201 }
4202 4202 goto out;
4203 4203 }
4204 4204 }
4205 4205 }
4206 4206
4207 4207 /* Get dir "before" change value */
4208 4208 bdva.va_mask = AT_CTIME|AT_SEQ;
4209 4209 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4210 4210 if (error) {
4211 4211 *cs->statusp = resp->status = puterrno4(error);
4212 4212 if (nm != name)
4213 4213 kmem_free(name, MAXPATHLEN + 1);
4214 4214 kmem_free(nm, len);
4215 4215 if (in_crit)
4216 4216 nbl_end_crit(vp);
4217 4217 VN_RELE(vp);
4218 4218 if (fp) {
4219 4219 rfs4_clear_dont_grant(fp);
4220 4220 rfs4_file_rele(fp);
4221 4221 }
4222 4222 goto out;
4223 4223 }
4224 4224 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4225 4225
4226 4226 /* Actually do the REMOVE operation */
4227 4227 if (vp->v_type == VDIR) {
4228 4228 /*
4229 4229 * Can't remove a directory that has a mounted-on filesystem.
4230 4230 */
4231 4231 if (vn_ismntpt(vp)) {
4232 4232 error = EACCES;
4233 4233 } else {
4234 4234 /*
4235 4235 * System V defines rmdir to return EEXIST,
4236 4236 * not ENOTEMPTY, if the directory is not
4237 4237 * empty. A System V NFS server needs to map
4238 4238 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4239 4239 * transmit over the wire.
4240 4240 */
4241 4241 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4242 4242 NULL, 0)) == EEXIST)
4243 4243 error = ENOTEMPTY;
4244 4244 }
4245 4245 } else {
4246 4246 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4247 4247 fp != NULL) {
4248 4248 struct vattr va;
4249 4249 vnode_t *tvp;
4250 4250
4251 4251 rfs4_dbe_lock(fp->rf_dbe);
4252 4252 tvp = fp->rf_vp;
4253 4253 if (tvp)
4254 4254 VN_HOLD(tvp);
4255 4255 rfs4_dbe_unlock(fp->rf_dbe);
4256 4256
4257 4257 if (tvp) {
4258 4258 /*
4259 4259 * This is va_seq safe because we are not
4260 4260 * manipulating dvp.
4261 4261 */
4262 4262 va.va_mask = AT_NLINK;
4263 4263 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4264 4264 va.va_nlink == 0) {
4265 4265 /* Remove state on file remove */
4266 4266 if (in_crit) {
4267 4267 nbl_end_crit(vp);
4268 4268 in_crit = 0;
4269 4269 }
4270 4270 rfs4_close_all_state(fp);
4271 4271 }
4272 4272 VN_RELE(tvp);
4273 4273 }
4274 4274 }
4275 4275 }
4276 4276
4277 4277 if (in_crit)
4278 4278 nbl_end_crit(vp);
4279 4279 VN_RELE(vp);
4280 4280
4281 4281 if (fp) {
4282 4282 rfs4_clear_dont_grant(fp);
4283 4283 rfs4_file_rele(fp);
4284 4284 }
4285 4285 if (nm != name)
4286 4286 kmem_free(name, MAXPATHLEN + 1);
4287 4287 kmem_free(nm, len);
4288 4288
4289 4289 if (error) {
4290 4290 *cs->statusp = resp->status = puterrno4(error);
4291 4291 goto out;
4292 4292 }
4293 4293
4294 4294 /*
4295 4295 * Get the initial "after" sequence number, if it fails, set to zero
4296 4296 */
4297 4297 idva.va_mask = AT_SEQ;
4298 4298 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4299 4299 idva.va_seq = 0;
4300 4300
4301 4301 /*
4302 4302 * Force modified data and metadata out to stable storage.
4303 4303 */
4304 4304 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4305 4305
4306 4306 /*
4307 4307 * Get "after" change value, if it fails, simply return the
4308 4308 * before value.
4309 4309 */
4310 4310 adva.va_mask = AT_CTIME|AT_SEQ;
4311 4311 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4312 4312 adva.va_ctime = bdva.va_ctime;
4313 4313 adva.va_seq = 0;
4314 4314 }
4315 4315
4316 4316 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4317 4317
4318 4318 /*
4319 4319 * The cinfo.atomic = TRUE only if we have
4320 4320 * non-zero va_seq's, and it has incremented by exactly one
4321 4321 * during the VOP_REMOVE/RMDIR and it didn't change during
4322 4322 * the VOP_FSYNC.
4323 4323 */
4324 4324 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4325 4325 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4326 4326 resp->cinfo.atomic = TRUE;
4327 4327 else
4328 4328 resp->cinfo.atomic = FALSE;
4329 4329
4330 4330 *cs->statusp = resp->status = NFS4_OK;
4331 4331
4332 4332 out:
4333 4333 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4334 4334 REMOVE4res *, resp);
4335 4335 }
4336 4336
4337 4337 /*
4338 4338 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4339 4339 * oldname and newname.
4340 4340 * res: status. If success - CURRENT_FH unchanged, return change_info
4341 4341 * for both from and target directories.
4342 4342 */
4343 4343 /* ARGSUSED */
4344 4344 static void
4345 4345 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4346 4346 struct compound_state *cs)
4347 4347 {
4348 4348 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4349 4349 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4350 4350 int error;
4351 4351 vnode_t *odvp;
4352 4352 vnode_t *ndvp;
4353 4353 vnode_t *srcvp, *targvp, *tvp;
4354 4354 struct vattr obdva, oidva, oadva;
4355 4355 struct vattr nbdva, nidva, nadva;
4356 4356 char *onm, *nnm;
4357 4357 uint_t olen, nlen;
4358 4358 rfs4_file_t *fp, *sfp;
4359 4359 int in_crit_src, in_crit_targ;
4360 4360 int fp_rele_grant_hold, sfp_rele_grant_hold;
4361 4361 int unlinked;
4362 4362 bslabel_t *clabel;
4363 4363 struct sockaddr *ca;
4364 4364 char *converted_onm = NULL;
4365 4365 char *converted_nnm = NULL;
4366 4366 nfsstat4 status;
4367 4367
4368 4368 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4369 4369 RENAME4args *, args);
4370 4370
4371 4371 fp = sfp = NULL;
4372 4372 srcvp = targvp = tvp = NULL;
4373 4373 in_crit_src = in_crit_targ = 0;
4374 4374 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4375 4375 unlinked = 0;
4376 4376
4377 4377 /* CURRENT_FH: target directory */
4378 4378 ndvp = cs->vp;
4379 4379 if (ndvp == NULL) {
4380 4380 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4381 4381 goto out;
4382 4382 }
4383 4383
4384 4384 /* SAVED_FH: from directory */
4385 4385 odvp = cs->saved_vp;
4386 4386 if (odvp == NULL) {
4387 4387 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4388 4388 goto out;
4389 4389 }
4390 4390
4391 4391 if (cs->access == CS_ACCESS_DENIED) {
4392 4392 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4393 4393 goto out;
4394 4394 }
4395 4395
4396 4396 /*
4397 4397 * If there is an unshared filesystem mounted on this vnode,
4398 4398 * do not allow to rename objects in this directory.
4399 4399 */
4400 4400 if (vn_ismntpt(odvp)) {
4401 4401 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4402 4402 goto out;
4403 4403 }
4404 4404
4405 4405 /*
4406 4406 * If there is an unshared filesystem mounted on this vnode,
4407 4407 * do not allow to rename to this directory.
4408 4408 */
4409 4409 if (vn_ismntpt(ndvp)) {
4410 4410 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4411 4411 goto out;
4412 4412 }
4413 4413
4414 4414 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4415 4415 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4416 4416 goto out;
4417 4417 }
4418 4418
4419 4419 if (cs->saved_exi != cs->exi) {
4420 4420 *cs->statusp = resp->status = NFS4ERR_XDEV;
4421 4421 goto out;
4422 4422 }
4423 4423
4424 4424 status = utf8_dir_verify(&args->oldname);
4425 4425 if (status != NFS4_OK) {
4426 4426 *cs->statusp = resp->status = status;
4427 4427 goto out;
4428 4428 }
4429 4429
4430 4430 status = utf8_dir_verify(&args->newname);
4431 4431 if (status != NFS4_OK) {
4432 4432 *cs->statusp = resp->status = status;
4433 4433 goto out;
4434 4434 }
4435 4435
4436 4436 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4437 4437 if (onm == NULL) {
4438 4438 *cs->statusp = resp->status = NFS4ERR_INVAL;
4439 4439 goto out;
4440 4440 }
4441 4441 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4442 4442 nlen = MAXPATHLEN + 1;
4443 4443 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4444 4444 nlen);
4445 4445
4446 4446 if (converted_onm == NULL) {
4447 4447 *cs->statusp = resp->status = NFS4ERR_INVAL;
4448 4448 kmem_free(onm, olen);
4449 4449 goto out;
4450 4450 }
4451 4451
4452 4452 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4453 4453 if (nnm == NULL) {
4454 4454 *cs->statusp = resp->status = NFS4ERR_INVAL;
4455 4455 if (onm != converted_onm)
4456 4456 kmem_free(converted_onm, MAXPATHLEN + 1);
4457 4457 kmem_free(onm, olen);
4458 4458 goto out;
4459 4459 }
4460 4460 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4461 4461 MAXPATHLEN + 1);
4462 4462
4463 4463 if (converted_nnm == NULL) {
4464 4464 *cs->statusp = resp->status = NFS4ERR_INVAL;
4465 4465 kmem_free(nnm, nlen);
4466 4466 nnm = NULL;
4467 4467 if (onm != converted_onm)
4468 4468 kmem_free(converted_onm, MAXPATHLEN + 1);
4469 4469 kmem_free(onm, olen);
4470 4470 goto out;
4471 4471 }
4472 4472
4473 4473
4474 4474 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4475 4475 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4476 4476 kmem_free(onm, olen);
4477 4477 kmem_free(nnm, nlen);
4478 4478 goto out;
4479 4479 }
4480 4480
4481 4481
4482 4482 if (rdonly4(req, cs)) {
4483 4483 *cs->statusp = resp->status = NFS4ERR_ROFS;
4484 4484 if (onm != converted_onm)
4485 4485 kmem_free(converted_onm, MAXPATHLEN + 1);
4486 4486 kmem_free(onm, olen);
4487 4487 if (nnm != converted_nnm)
4488 4488 kmem_free(converted_nnm, MAXPATHLEN + 1);
4489 4489 kmem_free(nnm, nlen);
4490 4490 goto out;
4491 4491 }
4492 4492
4493 4493 /* check label of the target dir */
4494 4494 if (is_system_labeled()) {
4495 4495 ASSERT(req->rq_label != NULL);
4496 4496 clabel = req->rq_label;
4497 4497 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4498 4498 "got client label from request(1)",
4499 4499 struct svc_req *, req);
4500 4500 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4501 4501 if (!do_rfs_label_check(clabel, ndvp,
4502 4502 EQUALITY_CHECK, cs->exi)) {
4503 4503 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4504 4504 goto err_out;
4505 4505 }
4506 4506 }
4507 4507 }
4508 4508
4509 4509 /*
4510 4510 * Is the source a file and have a delegation?
4511 4511 * We don't need to acquire va_seq before these lookups, if
4512 4512 * it causes an update, cinfo.before will not match, which will
4513 4513 * trigger a cache flush even if atomic is TRUE.
4514 4514 */
4515 4515 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4516 4516 &error, cs->cr)) {
4517 4517 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4518 4518 NULL)) {
4519 4519 *cs->statusp = resp->status = NFS4ERR_DELAY;
4520 4520 goto err_out;
4521 4521 }
4522 4522 }
4523 4523
4524 4524 if (srcvp == NULL) {
4525 4525 *cs->statusp = resp->status = puterrno4(error);
4526 4526 if (onm != converted_onm)
4527 4527 kmem_free(converted_onm, MAXPATHLEN + 1);
4528 4528 kmem_free(onm, olen);
4529 4529 if (nnm != converted_nnm)
4530 4530 kmem_free(converted_nnm, MAXPATHLEN + 1);
4531 4531 kmem_free(nnm, nlen);
4532 4532 goto out;
4533 4533 }
4534 4534
4535 4535 sfp_rele_grant_hold = 1;
4536 4536
4537 4537 /* Does the destination exist and a file and have a delegation? */
4538 4538 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4539 4539 NULL, cs->cr)) {
4540 4540 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4541 4541 NULL)) {
4542 4542 *cs->statusp = resp->status = NFS4ERR_DELAY;
4543 4543 goto err_out;
4544 4544 }
4545 4545 }
4546 4546 fp_rele_grant_hold = 1;
4547 4547
4548 4548 /* Check for NBMAND lock on both source and target */
4549 4549 if (nbl_need_check(srcvp)) {
4550 4550 nbl_start_crit(srcvp, RW_READER);
4551 4551 in_crit_src = 1;
4552 4552 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4553 4553 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4554 4554 goto err_out;
4555 4555 }
4556 4556 }
4557 4557
4558 4558 if (targvp && nbl_need_check(targvp)) {
4559 4559 nbl_start_crit(targvp, RW_READER);
4560 4560 in_crit_targ = 1;
4561 4561 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4562 4562 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4563 4563 goto err_out;
4564 4564 }
4565 4565 }
4566 4566
4567 4567 /* Get source "before" change value */
4568 4568 obdva.va_mask = AT_CTIME|AT_SEQ;
4569 4569 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4570 4570 if (!error) {
4571 4571 nbdva.va_mask = AT_CTIME|AT_SEQ;
4572 4572 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4573 4573 }
4574 4574 if (error) {
4575 4575 *cs->statusp = resp->status = puterrno4(error);
4576 4576 goto err_out;
4577 4577 }
4578 4578
4579 4579 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4580 4580 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4581 4581
4582 4582 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4583 4583 NULL, 0);
4584 4584
4585 4585 /*
4586 4586 * If target existed and was unlinked by VOP_RENAME, state will need
4587 4587 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4588 4588 * any necessary nbl_end_crit on srcvp and tgtvp.
4589 4589 */
4590 4590 if (error == 0 && fp != NULL) {
4591 4591 rfs4_dbe_lock(fp->rf_dbe);
4592 4592 tvp = fp->rf_vp;
4593 4593 if (tvp)
4594 4594 VN_HOLD(tvp);
4595 4595 rfs4_dbe_unlock(fp->rf_dbe);
4596 4596
4597 4597 if (tvp) {
4598 4598 struct vattr va;
4599 4599 va.va_mask = AT_NLINK;
4600 4600
4601 4601 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4602 4602 va.va_nlink == 0) {
4603 4603 unlinked = 1;
4604 4604
4605 4605 /* DEBUG data */
4606 4606 if ((srcvp == targvp) || (tvp != targvp)) {
4607 4607 cmn_err(CE_WARN, "rfs4_op_rename: "
4608 4608 "srcvp %p, targvp: %p, tvp: %p",
4609 4609 (void *)srcvp, (void *)targvp,
4610 4610 (void *)tvp);
4611 4611 }
4612 4612 } else {
4613 4613 VN_RELE(tvp);
4614 4614 }
4615 4615 }
4616 4616 }
4617 4617 if (error == 0)
4618 4618 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4619 4619
4620 4620 if (in_crit_src)
4621 4621 nbl_end_crit(srcvp);
4622 4622 if (srcvp)
4623 4623 VN_RELE(srcvp);
4624 4624 if (in_crit_targ)
4625 4625 nbl_end_crit(targvp);
4626 4626 if (targvp)
4627 4627 VN_RELE(targvp);
4628 4628
4629 4629 if (unlinked) {
4630 4630 ASSERT(fp != NULL);
4631 4631 ASSERT(tvp != NULL);
4632 4632
4633 4633 /* DEBUG data */
4634 4634 if (RW_READ_HELD(&tvp->v_nbllock)) {
4635 4635 cmn_err(CE_WARN, "rfs4_op_rename: "
4636 4636 "RW_READ_HELD(%p)", (void *)tvp);
4637 4637 }
4638 4638
4639 4639 /* The file is gone and so should the state */
4640 4640 rfs4_close_all_state(fp);
4641 4641 VN_RELE(tvp);
4642 4642 }
4643 4643
4644 4644 if (sfp) {
4645 4645 rfs4_clear_dont_grant(sfp);
4646 4646 rfs4_file_rele(sfp);
4647 4647 }
4648 4648 if (fp) {
4649 4649 rfs4_clear_dont_grant(fp);
4650 4650 rfs4_file_rele(fp);
4651 4651 }
4652 4652
4653 4653 if (converted_onm != onm)
4654 4654 kmem_free(converted_onm, MAXPATHLEN + 1);
4655 4655 kmem_free(onm, olen);
4656 4656 if (converted_nnm != nnm)
4657 4657 kmem_free(converted_nnm, MAXPATHLEN + 1);
4658 4658 kmem_free(nnm, nlen);
4659 4659
4660 4660 /*
4661 4661 * Get the initial "after" sequence number, if it fails, set to zero
4662 4662 */
4663 4663 oidva.va_mask = AT_SEQ;
4664 4664 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4665 4665 oidva.va_seq = 0;
4666 4666
4667 4667 nidva.va_mask = AT_SEQ;
4668 4668 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4669 4669 nidva.va_seq = 0;
4670 4670
4671 4671 /*
4672 4672 * Force modified data and metadata out to stable storage.
4673 4673 */
4674 4674 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4675 4675 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4676 4676
4677 4677 if (error) {
4678 4678 *cs->statusp = resp->status = puterrno4(error);
4679 4679 goto out;
4680 4680 }
4681 4681
4682 4682 /*
4683 4683 * Get "after" change values, if it fails, simply return the
4684 4684 * before value.
4685 4685 */
4686 4686 oadva.va_mask = AT_CTIME|AT_SEQ;
4687 4687 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4688 4688 oadva.va_ctime = obdva.va_ctime;
4689 4689 oadva.va_seq = 0;
4690 4690 }
4691 4691
4692 4692 nadva.va_mask = AT_CTIME|AT_SEQ;
4693 4693 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4694 4694 nadva.va_ctime = nbdva.va_ctime;
4695 4695 nadva.va_seq = 0;
4696 4696 }
4697 4697
4698 4698 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4699 4699 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4700 4700
4701 4701 /*
4702 4702 * The cinfo.atomic = TRUE only if we have
4703 4703 * non-zero va_seq's, and it has incremented by exactly one
4704 4704 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4705 4705 */
4706 4706 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4707 4707 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4708 4708 resp->source_cinfo.atomic = TRUE;
4709 4709 else
4710 4710 resp->source_cinfo.atomic = FALSE;
4711 4711
4712 4712 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4713 4713 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4714 4714 resp->target_cinfo.atomic = TRUE;
4715 4715 else
4716 4716 resp->target_cinfo.atomic = FALSE;
4717 4717
4718 4718 #ifdef VOLATILE_FH_TEST
4719 4719 {
4720 4720 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4721 4721
4722 4722 /*
4723 4723 * Add the renamed file handle to the volatile rename list
4724 4724 */
4725 4725 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4726 4726 /* file handles may expire on rename */
4727 4727 vnode_t *vp;
4728 4728
4729 4729 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4730 4730 /*
4731 4731 * Already know that nnm will be a valid string
4732 4732 */
4733 4733 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4734 4734 NULL, NULL, NULL);
4735 4735 kmem_free(nnm, nlen);
4736 4736 if (!error) {
4737 4737 add_volrnm_fh(cs->exi, vp);
4738 4738 VN_RELE(vp);
4739 4739 }
4740 4740 }
4741 4741 }
4742 4742 #endif /* VOLATILE_FH_TEST */
4743 4743
4744 4744 *cs->statusp = resp->status = NFS4_OK;
4745 4745 out:
4746 4746 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4747 4747 RENAME4res *, resp);
4748 4748 return;
4749 4749
4750 4750 err_out:
4751 4751 if (onm != converted_onm)
4752 4752 kmem_free(converted_onm, MAXPATHLEN + 1);
4753 4753 if (onm != NULL)
4754 4754 kmem_free(onm, olen);
4755 4755 if (nnm != converted_nnm)
4756 4756 kmem_free(converted_nnm, MAXPATHLEN + 1);
4757 4757 if (nnm != NULL)
4758 4758 kmem_free(nnm, nlen);
4759 4759
4760 4760 if (in_crit_src) nbl_end_crit(srcvp);
4761 4761 if (in_crit_targ) nbl_end_crit(targvp);
4762 4762 if (targvp) VN_RELE(targvp);
4763 4763 if (srcvp) VN_RELE(srcvp);
4764 4764 if (sfp) {
4765 4765 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4766 4766 rfs4_file_rele(sfp);
4767 4767 }
4768 4768 if (fp) {
4769 4769 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4770 4770 rfs4_file_rele(fp);
4771 4771 }
4772 4772
4773 4773 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4774 4774 RENAME4res *, resp);
4775 4775 }
4776 4776
4777 4777 /* ARGSUSED */
4778 4778 static void
4779 4779 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4780 4780 struct compound_state *cs)
4781 4781 {
4782 4782 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4783 4783 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4784 4784 rfs4_client_t *cp;
4785 4785
4786 4786 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4787 4787 RENEW4args *, args);
4788 4788
4789 4789 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4790 4790 *cs->statusp = resp->status =
4791 4791 rfs4_check_clientid(&args->clientid, 0);
4792 4792 goto out;
4793 4793 }
4794 4794
4795 4795 if (rfs4_lease_expired(cp)) {
4796 4796 rfs4_client_rele(cp);
4797 4797 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4798 4798 goto out;
4799 4799 }
4800 4800
4801 4801 rfs4_update_lease(cp);
4802 4802
4803 4803 mutex_enter(cp->rc_cbinfo.cb_lock);
4804 4804 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4805 4805 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4806 4806 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4807 4807 } else {
4808 4808 *cs->statusp = resp->status = NFS4_OK;
4809 4809 }
4810 4810 mutex_exit(cp->rc_cbinfo.cb_lock);
4811 4811
4812 4812 rfs4_client_rele(cp);
4813 4813
4814 4814 out:
4815 4815 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4816 4816 RENEW4res *, resp);
4817 4817 }
4818 4818
4819 4819 /* ARGSUSED */
4820 4820 static void
4821 4821 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4822 4822 struct compound_state *cs)
4823 4823 {
4824 4824 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4825 4825
4826 4826 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4827 4827
4828 4828 /* No need to check cs->access - we are not accessing any object */
4829 4829 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4830 4830 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4831 4831 goto out;
4832 4832 }
4833 4833 if (cs->vp != NULL) {
4834 4834 VN_RELE(cs->vp);
4835 4835 }
4836 4836 cs->vp = cs->saved_vp;
4837 4837 cs->saved_vp = NULL;
4838 4838 cs->exi = cs->saved_exi;
4839 4839 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4840 4840 *cs->statusp = resp->status = NFS4_OK;
4841 4841 cs->deleg = FALSE;
4842 4842
4843 4843 out:
4844 4844 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4845 4845 RESTOREFH4res *, resp);
4846 4846 }
4847 4847
4848 4848 /* ARGSUSED */
4849 4849 static void
4850 4850 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4851 4851 struct compound_state *cs)
4852 4852 {
4853 4853 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4854 4854
4855 4855 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4856 4856
4857 4857 /* No need to check cs->access - we are not accessing any object */
4858 4858 if (cs->vp == NULL) {
4859 4859 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4860 4860 goto out;
4861 4861 }
4862 4862 if (cs->saved_vp != NULL) {
4863 4863 VN_RELE(cs->saved_vp);
4864 4864 }
4865 4865 cs->saved_vp = cs->vp;
4866 4866 VN_HOLD(cs->saved_vp);
4867 4867 cs->saved_exi = cs->exi;
4868 4868 /*
4869 4869 * since SAVEFH is fairly rare, don't alloc space for its fh
4870 4870 * unless necessary.
4871 4871 */
4872 4872 if (cs->saved_fh.nfs_fh4_val == NULL) {
4873 4873 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4874 4874 }
4875 4875 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4876 4876 *cs->statusp = resp->status = NFS4_OK;
4877 4877
4878 4878 out:
4879 4879 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4880 4880 SAVEFH4res *, resp);
4881 4881 }
4882 4882
4883 4883 /*
4884 4884 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4885 4885 * return the bitmap of attrs that were set successfully. It is also
4886 4886 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4887 4887 * always be called only after rfs4_do_set_attrs().
4888 4888 *
4889 4889 * Verify that the attributes are same as the expected ones. sargp->vap
4890 4890 * and sargp->sbp contain the input attributes as translated from fattr4.
4891 4891 *
4892 4892 * This function verifies only the attrs that correspond to a vattr or
4893 4893 * vfsstat struct. That is because of the extra step needed to get the
4894 4894 * corresponding system structs. Other attributes have already been set or
4895 4895 * verified by do_rfs4_set_attrs.
4896 4896 *
4897 4897 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4898 4898 */
4899 4899 static int
4900 4900 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4901 4901 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4902 4902 {
4903 4903 int error, ret_error = 0;
4904 4904 int i, k;
4905 4905 uint_t sva_mask = sargp->vap->va_mask;
4906 4906 uint_t vbit;
4907 4907 union nfs4_attr_u *na;
4908 4908 uint8_t *amap;
4909 4909 bool_t getsb = ntovp->vfsstat;
4910 4910
4911 4911 if (sva_mask != 0) {
4912 4912 /*
4913 4913 * Okay to overwrite sargp->vap because we verify based
4914 4914 * on the incoming values.
4915 4915 */
4916 4916 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4917 4917 sargp->cs->cr, NULL);
4918 4918 if (ret_error) {
4919 4919 if (resp == NULL)
4920 4920 return (ret_error);
4921 4921 /*
4922 4922 * Must return bitmap of successful attrs
4923 4923 */
4924 4924 sva_mask = 0; /* to prevent checking vap later */
4925 4925 } else {
4926 4926 /*
4927 4927 * Some file systems clobber va_mask. it is probably
4928 4928 * wrong of them to do so, nonethless we practice
4929 4929 * defensive coding.
4930 4930 * See bug id 4276830.
4931 4931 */
4932 4932 sargp->vap->va_mask = sva_mask;
4933 4933 }
4934 4934 }
4935 4935
4936 4936 if (getsb) {
4937 4937 /*
4938 4938 * Now get the superblock and loop on the bitmap, as there is
4939 4939 * no simple way of translating from superblock to bitmap4.
4940 4940 */
4941 4941 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4942 4942 if (ret_error) {
4943 4943 if (resp == NULL)
4944 4944 goto errout;
4945 4945 getsb = FALSE;
4946 4946 }
4947 4947 }
4948 4948
4949 4949 /*
4950 4950 * Now loop and verify each attribute which getattr returned
4951 4951 * whether it's the same as the input.
4952 4952 */
4953 4953 if (resp == NULL && !getsb && (sva_mask == 0))
4954 4954 goto errout;
4955 4955
4956 4956 na = ntovp->na;
4957 4957 amap = ntovp->amap;
4958 4958 k = 0;
4959 4959 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4960 4960 k = *amap;
4961 4961 ASSERT(nfs4_ntov_map[k].nval == k);
4962 4962 vbit = nfs4_ntov_map[k].vbit;
4963 4963
4964 4964 /*
4965 4965 * If vattr attribute but VOP_GETATTR failed, or it's
4966 4966 * superblock attribute but VFS_STATVFS failed, skip
4967 4967 */
4968 4968 if (vbit) {
4969 4969 if ((vbit & sva_mask) == 0)
4970 4970 continue;
4971 4971 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4972 4972 continue;
4973 4973 }
4974 4974 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4975 4975 if (resp != NULL) {
4976 4976 if (error)
4977 4977 ret_error = -1; /* not all match */
4978 4978 else /* update response bitmap */
4979 4979 *resp |= nfs4_ntov_map[k].fbit;
4980 4980 continue;
4981 4981 }
4982 4982 if (error) {
4983 4983 ret_error = -1; /* not all match */
4984 4984 break;
4985 4985 }
4986 4986 }
4987 4987 errout:
4988 4988 return (ret_error);
4989 4989 }
4990 4990
4991 4991 /*
4992 4992 * Decode the attribute to be set/verified. If the attr requires a sys op
4993 4993 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4994 4994 * call the sv_getit function for it, because the sys op hasn't yet been done.
4995 4995 * Return 0 for success, error code if failed.
4996 4996 *
4997 4997 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4998 4998 */
4999 4999 static int
5000 5000 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5001 5001 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5002 5002 {
5003 5003 int error = 0;
5004 5004 bool_t set_later;
5005 5005
5006 5006 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5007 5007
5008 5008 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5009 5009 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5010 5010 /*
5011 5011 * don't verify yet if a vattr or sb dependent attr,
5012 5012 * because we don't have their sys values yet.
5013 5013 * Will be done later.
5014 5014 */
5015 5015 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5016 5016 /*
5017 5017 * ACLs are a special case, since setting the MODE
5018 5018 * conflicts with setting the ACL. We delay setting
5019 5019 * the ACL until all other attributes have been set.
5020 5020 * The ACL gets set in do_rfs4_op_setattr().
5021 5021 */
5022 5022 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5023 5023 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5024 5024 sargp, nap);
5025 5025 if (error) {
5026 5026 xdr_free(nfs4_ntov_map[k].xfunc,
5027 5027 (caddr_t)nap);
5028 5028 }
5029 5029 }
5030 5030 }
5031 5031 } else {
5032 5032 #ifdef DEBUG
5033 5033 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5034 5034 "decoding attribute %d\n", k);
5035 5035 #endif
5036 5036 error = EINVAL;
5037 5037 }
5038 5038 if (!error && resp_bval && !set_later) {
5039 5039 *resp_bval |= nfs4_ntov_map[k].fbit;
5040 5040 }
5041 5041
5042 5042 return (error);
5043 5043 }
5044 5044
5045 5045 /*
5046 5046 * Set vattr based on incoming fattr4 attrs - used by setattr.
5047 5047 * Set response mask. Ignore any values that are not writable vattr attrs.
5048 5048 */
5049 5049 static nfsstat4
5050 5050 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5051 5051 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5052 5052 nfs4_attr_cmd_t cmd)
5053 5053 {
5054 5054 int error = 0;
5055 5055 int i;
5056 5056 char *attrs = fattrp->attrlist4;
5057 5057 uint32_t attrslen = fattrp->attrlist4_len;
5058 5058 XDR xdr;
5059 5059 nfsstat4 status = NFS4_OK;
5060 5060 vnode_t *vp = cs->vp;
5061 5061 union nfs4_attr_u *na;
5062 5062 uint8_t *amap;
5063 5063
5064 5064 #ifndef lint
5065 5065 /*
5066 5066 * Make sure that maximum attribute number can be expressed as an
5067 5067 * 8 bit quantity.
5068 5068 */
5069 5069 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5070 5070 #endif
5071 5071
5072 5072 if (vp == NULL) {
5073 5073 if (resp)
5074 5074 *resp = 0;
5075 5075 return (NFS4ERR_NOFILEHANDLE);
5076 5076 }
5077 5077 if (cs->access == CS_ACCESS_DENIED) {
5078 5078 if (resp)
5079 5079 *resp = 0;
5080 5080 return (NFS4ERR_ACCESS);
5081 5081 }
5082 5082
5083 5083 sargp->op = cmd;
5084 5084 sargp->cs = cs;
5085 5085 sargp->flag = 0; /* may be set later */
5086 5086 sargp->vap->va_mask = 0;
5087 5087 sargp->rdattr_error = NFS4_OK;
5088 5088 sargp->rdattr_error_req = FALSE;
5089 5089 /* sargp->sbp is set by the caller */
5090 5090
5091 5091 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5092 5092
5093 5093 na = ntovp->na;
5094 5094 amap = ntovp->amap;
5095 5095
5096 5096 /*
5097 5097 * The following loop iterates on the nfs4_ntov_map checking
5098 5098 * if the fbit is set in the requested bitmap.
5099 5099 * If set then we process the arguments using the
5100 5100 * rfs4_fattr4 conversion functions to populate the setattr
5101 5101 * vattr and va_mask. Any settable attrs that are not using vattr
5102 5102 * will be set in this loop.
5103 5103 */
5104 5104 for (i = 0; i < nfs4_ntov_map_size; i++) {
5105 5105 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5106 5106 continue;
5107 5107 }
5108 5108 /*
5109 5109 * If setattr, must be a writable attr.
5110 5110 * If verify/nverify, must be a readable attr.
5111 5111 */
5112 5112 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5113 5113 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5114 5114 /*
5115 5115 * Client tries to set/verify an
5116 5116 * unsupported attribute, tries to set
5117 5117 * a read only attr or verify a write
5118 5118 * only one - error!
5119 5119 */
5120 5120 break;
5121 5121 }
5122 5122 /*
5123 5123 * Decode the attribute to set/verify
5124 5124 */
5125 5125 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5126 5126 &xdr, resp ? resp : NULL, na);
5127 5127 if (error)
5128 5128 break;
5129 5129 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5130 5130 na++;
5131 5131 (ntovp->attrcnt)++;
5132 5132 if (nfs4_ntov_map[i].vfsstat)
5133 5133 ntovp->vfsstat = TRUE;
5134 5134 }
5135 5135
5136 5136 if (error != 0)
5137 5137 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5138 5138 puterrno4(error));
5139 5139 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5140 5140 return (status);
5141 5141 }
5142 5142
5143 5143 static nfsstat4
5144 5144 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5145 5145 stateid4 *stateid)
5146 5146 {
5147 5147 int error = 0;
5148 5148 struct nfs4_svgetit_arg sarg;
5149 5149 bool_t trunc;
5150 5150
5151 5151 nfsstat4 status = NFS4_OK;
5152 5152 cred_t *cr = cs->cr;
5153 5153 vnode_t *vp = cs->vp;
5154 5154 struct nfs4_ntov_table ntov;
5155 5155 struct statvfs64 sb;
5156 5156 struct vattr bva;
5157 5157 struct flock64 bf;
5158 5158 int in_crit = 0;
5159 5159 uint_t saved_mask = 0;
5160 5160 caller_context_t ct;
5161 5161
5162 5162 *resp = 0;
5163 5163 sarg.sbp = &sb;
5164 5164 sarg.is_referral = B_FALSE;
5165 5165 nfs4_ntov_table_init(&ntov);
5166 5166 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5167 5167 NFS4ATTR_SETIT);
5168 5168 if (status != NFS4_OK) {
5169 5169 /*
5170 5170 * failed set attrs
5171 5171 */
5172 5172 goto done;
5173 5173 }
5174 5174 if ((sarg.vap->va_mask == 0) &&
5175 5175 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5176 5176 /*
5177 5177 * no further work to be done
5178 5178 */
5179 5179 goto done;
5180 5180 }
5181 5181
5182 5182 /*
5183 5183 * If we got a request to set the ACL and the MODE, only
5184 5184 * allow changing VSUID, VSGID, and VSVTX. Attempting
5185 5185 * to change any other bits, along with setting an ACL,
5186 5186 * gives NFS4ERR_INVAL.
5187 5187 */
5188 5188 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5189 5189 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5190 5190 vattr_t va;
5191 5191
5192 5192 va.va_mask = AT_MODE;
5193 5193 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5194 5194 if (error) {
5195 5195 status = puterrno4(error);
5196 5196 goto done;
5197 5197 }
5198 5198 if ((sarg.vap->va_mode ^ va.va_mode) &
5199 5199 ~(VSUID | VSGID | VSVTX)) {
5200 5200 status = NFS4ERR_INVAL;
5201 5201 goto done;
5202 5202 }
5203 5203 }
5204 5204
5205 5205 /* Check stateid only if size has been set */
5206 5206 if (sarg.vap->va_mask & AT_SIZE) {
5207 5207 trunc = (sarg.vap->va_size == 0);
5208 5208 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5209 5209 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5210 5210 if (status != NFS4_OK)
5211 5211 goto done;
5212 5212 } else {
5213 5213 ct.cc_sysid = 0;
5214 5214 ct.cc_pid = 0;
5215 5215 ct.cc_caller_id = nfs4_srv_caller_id;
5216 5216 ct.cc_flags = CC_DONTBLOCK;
5217 5217 }
5218 5218
5219 5219 /* XXX start of possible race with delegations */
5220 5220
5221 5221 /*
5222 5222 * We need to specially handle size changes because it is
5223 5223 * possible for the client to create a file with read-only
5224 5224 * modes, but with the file opened for writing. If the client
5225 5225 * then tries to set the file size, e.g. ftruncate(3C),
5226 5226 * fcntl(F_FREESP), the normal access checking done in
5227 5227 * VOP_SETATTR would prevent the client from doing it even though
5228 5228 * it should be allowed to do so. To get around this, we do the
5229 5229 * access checking for ourselves and use VOP_SPACE which doesn't
5230 5230 * do the access checking.
5231 5231 * Also the client should not be allowed to change the file
5232 5232 * size if there is a conflicting non-blocking mandatory lock in
5233 5233 * the region of the change.
5234 5234 */
5235 5235 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5236 5236 u_offset_t offset;
5237 5237 ssize_t length;
5238 5238
5239 5239 /*
5240 5240 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5241 5241 * before returning, sarg.vap->va_mask is used to
5242 5242 * generate the setattr reply bitmap. We also clear
5243 5243 * AT_SIZE below before calling VOP_SPACE. For both
5244 5244 * of these cases, the va_mask needs to be saved here
5245 5245 * and restored after calling VOP_SETATTR.
5246 5246 */
5247 5247 saved_mask = sarg.vap->va_mask;
5248 5248
5249 5249 /*
5250 5250 * Check any possible conflict due to NBMAND locks.
5251 5251 * Get into critical region before VOP_GETATTR, so the
5252 5252 * size attribute is valid when checking conflicts.
5253 5253 */
5254 5254 if (nbl_need_check(vp)) {
5255 5255 nbl_start_crit(vp, RW_READER);
5256 5256 in_crit = 1;
5257 5257 }
5258 5258
5259 5259 bva.va_mask = AT_UID|AT_SIZE;
5260 5260 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5261 5261 status = puterrno4(error);
5262 5262 goto done;
5263 5263 }
5264 5264
5265 5265 if (in_crit) {
5266 5266 if (sarg.vap->va_size < bva.va_size) {
5267 5267 offset = sarg.vap->va_size;
5268 5268 length = bva.va_size - sarg.vap->va_size;
5269 5269 } else {
5270 5270 offset = bva.va_size;
5271 5271 length = sarg.vap->va_size - bva.va_size;
5272 5272 }
5273 5273 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5274 5274 &ct)) {
5275 5275 status = NFS4ERR_LOCKED;
5276 5276 goto done;
5277 5277 }
5278 5278 }
5279 5279
5280 5280 if (crgetuid(cr) == bva.va_uid) {
5281 5281 sarg.vap->va_mask &= ~AT_SIZE;
5282 5282 bf.l_type = F_WRLCK;
5283 5283 bf.l_whence = 0;
5284 5284 bf.l_start = (off64_t)sarg.vap->va_size;
5285 5285 bf.l_len = 0;
5286 5286 bf.l_sysid = 0;
5287 5287 bf.l_pid = 0;
5288 5288 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5289 5289 (offset_t)sarg.vap->va_size, cr, &ct);
5290 5290 }
5291 5291 }
5292 5292
5293 5293 if (!error && sarg.vap->va_mask != 0)
5294 5294 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5295 5295
5296 5296 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5297 5297 if (saved_mask & AT_SIZE)
5298 5298 sarg.vap->va_mask |= AT_SIZE;
5299 5299
5300 5300 /*
5301 5301 * If an ACL was being set, it has been delayed until now,
5302 5302 * in order to set the mode (via the VOP_SETATTR() above) first.
5303 5303 */
5304 5304 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5305 5305 int i;
5306 5306
5307 5307 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5308 5308 if (ntov.amap[i] == FATTR4_ACL)
5309 5309 break;
5310 5310 if (i < NFS4_MAXNUM_ATTRS) {
5311 5311 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5312 5312 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5313 5313 if (error == 0) {
5314 5314 *resp |= FATTR4_ACL_MASK;
5315 5315 } else if (error == ENOTSUP) {
5316 5316 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5317 5317 status = NFS4ERR_ATTRNOTSUPP;
5318 5318 goto done;
5319 5319 }
5320 5320 } else {
5321 5321 NFS4_DEBUG(rfs4_debug,
5322 5322 (CE_NOTE, "do_rfs4_op_setattr: "
5323 5323 "unable to find ACL in fattr4"));
5324 5324 error = EINVAL;
5325 5325 }
5326 5326 }
5327 5327
5328 5328 if (error) {
5329 5329 /* check if a monitor detected a delegation conflict */
5330 5330 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5331 5331 status = NFS4ERR_DELAY;
5332 5332 else
5333 5333 status = puterrno4(error);
5334 5334
5335 5335 /*
5336 5336 * Set the response bitmap when setattr failed.
5337 5337 * If VOP_SETATTR partially succeeded, test by doing a
5338 5338 * VOP_GETATTR on the object and comparing the data
5339 5339 * to the setattr arguments.
5340 5340 */
5341 5341 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5342 5342 } else {
5343 5343 /*
5344 5344 * Force modified metadata out to stable storage.
5345 5345 */
5346 5346 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5347 5347 /*
5348 5348 * Set response bitmap
5349 5349 */
5350 5350 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5351 5351 }
5352 5352
5353 5353 /* Return early and already have a NFSv4 error */
5354 5354 done:
5355 5355 /*
5356 5356 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5357 5357 * conversion sets both readable and writeable NFS4 attrs
5358 5358 * for AT_MTIME and AT_ATIME. The line below masks out
5359 5359 * unrequested attrs from the setattr result bitmap. This
5360 5360 * is placed after the done: label to catch the ATTRNOTSUP
5361 5361 * case.
5362 5362 */
5363 5363 *resp &= fattrp->attrmask;
5364 5364
5365 5365 if (in_crit)
5366 5366 nbl_end_crit(vp);
5367 5367
5368 5368 nfs4_ntov_table_free(&ntov, &sarg);
5369 5369
5370 5370 return (status);
5371 5371 }
5372 5372
5373 5373 /* ARGSUSED */
5374 5374 static void
5375 5375 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5376 5376 struct compound_state *cs)
5377 5377 {
5378 5378 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5379 5379 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5380 5380 bslabel_t *clabel;
5381 5381
5382 5382 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5383 5383 SETATTR4args *, args);
5384 5384
5385 5385 if (cs->vp == NULL) {
5386 5386 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5387 5387 goto out;
5388 5388 }
5389 5389
5390 5390 /*
5391 5391 * If there is an unshared filesystem mounted on this vnode,
5392 5392 * do not allow to setattr on this vnode.
5393 5393 */
5394 5394 if (vn_ismntpt(cs->vp)) {
5395 5395 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5396 5396 goto out;
5397 5397 }
5398 5398
5399 5399 resp->attrsset = 0;
5400 5400
5401 5401 if (rdonly4(req, cs)) {
5402 5402 *cs->statusp = resp->status = NFS4ERR_ROFS;
5403 5403 goto out;
5404 5404 }
5405 5405
5406 5406 /* check label before setting attributes */
5407 5407 if (is_system_labeled()) {
5408 5408 ASSERT(req->rq_label != NULL);
5409 5409 clabel = req->rq_label;
5410 5410 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5411 5411 "got client label from request(1)",
5412 5412 struct svc_req *, req);
5413 5413 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5414 5414 if (!do_rfs_label_check(clabel, cs->vp,
5415 5415 EQUALITY_CHECK, cs->exi)) {
5416 5416 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5417 5417 goto out;
5418 5418 }
5419 5419 }
5420 5420 }
5421 5421
5422 5422 *cs->statusp = resp->status =
5423 5423 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5424 5424 &args->stateid);
5425 5425
5426 5426 out:
5427 5427 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5428 5428 SETATTR4res *, resp);
5429 5429 }
5430 5430
5431 5431 /* ARGSUSED */
5432 5432 static void
5433 5433 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5434 5434 struct compound_state *cs)
5435 5435 {
5436 5436 /*
5437 5437 * verify and nverify are exactly the same, except that nverify
5438 5438 * succeeds when some argument changed, and verify succeeds when
5439 5439 * when none changed.
5440 5440 */
5441 5441
5442 5442 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5443 5443 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5444 5444
5445 5445 int error;
5446 5446 struct nfs4_svgetit_arg sarg;
5447 5447 struct statvfs64 sb;
5448 5448 struct nfs4_ntov_table ntov;
5449 5449
5450 5450 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5451 5451 VERIFY4args *, args);
5452 5452
5453 5453 if (cs->vp == NULL) {
5454 5454 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5455 5455 goto out;
5456 5456 }
5457 5457
5458 5458 sarg.sbp = &sb;
5459 5459 sarg.is_referral = B_FALSE;
5460 5460 nfs4_ntov_table_init(&ntov);
5461 5461 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5462 5462 &sarg, &ntov, NFS4ATTR_VERIT);
5463 5463 if (resp->status != NFS4_OK) {
5464 5464 /*
5465 5465 * do_rfs4_set_attrs will try to verify systemwide attrs,
5466 5466 * so could return -1 for "no match".
5467 5467 */
5468 5468 if (resp->status == -1)
5469 5469 resp->status = NFS4ERR_NOT_SAME;
5470 5470 goto done;
5471 5471 }
5472 5472 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5473 5473 switch (error) {
5474 5474 case 0:
5475 5475 resp->status = NFS4_OK;
5476 5476 break;
5477 5477 case -1:
5478 5478 resp->status = NFS4ERR_NOT_SAME;
5479 5479 break;
5480 5480 default:
5481 5481 resp->status = puterrno4(error);
5482 5482 break;
5483 5483 }
5484 5484 done:
5485 5485 *cs->statusp = resp->status;
5486 5486 nfs4_ntov_table_free(&ntov, &sarg);
5487 5487 out:
5488 5488 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5489 5489 VERIFY4res *, resp);
5490 5490 }
5491 5491
5492 5492 /* ARGSUSED */
5493 5493 static void
5494 5494 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5495 5495 struct compound_state *cs)
5496 5496 {
5497 5497 /*
5498 5498 * verify and nverify are exactly the same, except that nverify
5499 5499 * succeeds when some argument changed, and verify succeeds when
5500 5500 * when none changed.
5501 5501 */
5502 5502
5503 5503 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5504 5504 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5505 5505
5506 5506 int error;
5507 5507 struct nfs4_svgetit_arg sarg;
5508 5508 struct statvfs64 sb;
5509 5509 struct nfs4_ntov_table ntov;
5510 5510
5511 5511 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5512 5512 NVERIFY4args *, args);
5513 5513
5514 5514 if (cs->vp == NULL) {
5515 5515 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5516 5516 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5517 5517 NVERIFY4res *, resp);
5518 5518 return;
5519 5519 }
5520 5520 sarg.sbp = &sb;
5521 5521 sarg.is_referral = B_FALSE;
5522 5522 nfs4_ntov_table_init(&ntov);
5523 5523 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5524 5524 &sarg, &ntov, NFS4ATTR_VERIT);
5525 5525 if (resp->status != NFS4_OK) {
5526 5526 /*
5527 5527 * do_rfs4_set_attrs will try to verify systemwide attrs,
5528 5528 * so could return -1 for "no match".
5529 5529 */
5530 5530 if (resp->status == -1)
5531 5531 resp->status = NFS4_OK;
5532 5532 goto done;
5533 5533 }
5534 5534 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5535 5535 switch (error) {
5536 5536 case 0:
5537 5537 resp->status = NFS4ERR_SAME;
5538 5538 break;
5539 5539 case -1:
5540 5540 resp->status = NFS4_OK;
5541 5541 break;
5542 5542 default:
5543 5543 resp->status = puterrno4(error);
5544 5544 break;
5545 5545 }
5546 5546 done:
5547 5547 *cs->statusp = resp->status;
5548 5548 nfs4_ntov_table_free(&ntov, &sarg);
5549 5549
5550 5550 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5551 5551 NVERIFY4res *, resp);
5552 5552 }
5553 5553
5554 5554 /*
5555 5555 * XXX - This should live in an NFS header file.
5556 5556 */
5557 5557 #define MAX_IOVECS 12
5558 5558
5559 5559 /* ARGSUSED */
5560 5560 static void
5561 5561 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5562 5562 struct compound_state *cs)
5563 5563 {
5564 5564 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5565 5565 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5566 5566 int error;
5567 5567 vnode_t *vp;
5568 5568 struct vattr bva;
5569 5569 u_offset_t rlimit;
5570 5570 struct uio uio;
5571 5571 struct iovec iov[MAX_IOVECS];
5572 5572 struct iovec *iovp;
5573 5573 int iovcnt;
5574 5574 int ioflag;
5575 5575 cred_t *savecred, *cr;
5576 5576 bool_t *deleg = &cs->deleg;
5577 5577 nfsstat4 stat;
5578 5578 int in_crit = 0;
5579 5579 caller_context_t ct;
5580 5580 nfs4_srv_t *nsrv4;
5581 5581
5582 5582 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5583 5583 WRITE4args *, args);
5584 5584
5585 5585 vp = cs->vp;
5586 5586 if (vp == NULL) {
5587 5587 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5588 5588 goto out;
5589 5589 }
5590 5590 if (cs->access == CS_ACCESS_DENIED) {
5591 5591 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5592 5592 goto out;
5593 5593 }
5594 5594
5595 5595 cr = cs->cr;
5596 5596
5597 5597 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5598 5598 deleg, TRUE, &ct)) != NFS4_OK) {
5599 5599 *cs->statusp = resp->status = stat;
5600 5600 goto out;
5601 5601 }
5602 5602
5603 5603 /*
5604 5604 * We have to enter the critical region before calling VOP_RWLOCK
5605 5605 * to avoid a deadlock with ufs.
5606 5606 */
5607 5607 if (nbl_need_check(vp)) {
5608 5608 nbl_start_crit(vp, RW_READER);
5609 5609 in_crit = 1;
5610 5610 if (nbl_conflict(vp, NBL_WRITE,
5611 5611 args->offset, args->data_len, 0, &ct)) {
5612 5612 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5613 5613 goto out;
5614 5614 }
5615 5615 }
5616 5616
5617 5617 bva.va_mask = AT_MODE | AT_UID;
5618 5618 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5619 5619
5620 5620 /*
5621 5621 * If we can't get the attributes, then we can't do the
5622 5622 * right access checking. So, we'll fail the request.
5623 5623 */
5624 5624 if (error) {
5625 5625 *cs->statusp = resp->status = puterrno4(error);
5626 5626 goto out;
5627 5627 }
5628 5628
5629 5629 if (rdonly4(req, cs)) {
5630 5630 *cs->statusp = resp->status = NFS4ERR_ROFS;
5631 5631 goto out;
5632 5632 }
5633 5633
5634 5634 if (vp->v_type != VREG) {
5635 5635 *cs->statusp = resp->status =
5636 5636 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5637 5637 goto out;
5638 5638 }
5639 5639
5640 5640 if (crgetuid(cr) != bva.va_uid &&
5641 5641 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5642 5642 *cs->statusp = resp->status = puterrno4(error);
5643 5643 goto out;
5644 5644 }
5645 5645
5646 5646 if (MANDLOCK(vp, bva.va_mode)) {
5647 5647 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5648 5648 goto out;
5649 5649 }
5650 5650
5651 5651 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5652 5652 if (args->data_len == 0) {
5653 5653 *cs->statusp = resp->status = NFS4_OK;
5654 5654 resp->count = 0;
5655 5655 resp->committed = args->stable;
5656 5656 resp->writeverf = nsrv4->write4verf;
5657 5657 goto out;
5658 5658 }
5659 5659
5660 5660 if (args->mblk != NULL) {
5661 5661 mblk_t *m;
5662 5662 uint_t bytes, round_len;
5663 5663
5664 5664 iovcnt = 0;
5665 5665 bytes = 0;
5666 5666 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5667 5667 for (m = args->mblk;
5668 5668 m != NULL && bytes < round_len;
5669 5669 m = m->b_cont) {
5670 5670 iovcnt++;
5671 5671 bytes += MBLKL(m);
5672 5672 }
5673 5673 #ifdef DEBUG
5674 5674 /* should have ended on an mblk boundary */
5675 5675 if (bytes != round_len) {
5676 5676 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5677 5677 bytes, round_len, args->data_len);
5678 5678 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5679 5679 (void *)args->mblk, (void *)m);
5680 5680 ASSERT(bytes == round_len);
5681 5681 }
5682 5682 #endif
5683 5683 if (iovcnt <= MAX_IOVECS) {
5684 5684 iovp = iov;
5685 5685 } else {
5686 5686 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5687 5687 }
5688 5688 mblk_to_iov(args->mblk, iovcnt, iovp);
5689 5689 } else if (args->rlist != NULL) {
5690 5690 iovcnt = 1;
5691 5691 iovp = iov;
5692 5692 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5693 5693 iovp->iov_len = args->data_len;
5694 5694 } else {
5695 5695 iovcnt = 1;
5696 5696 iovp = iov;
5697 5697 iovp->iov_base = args->data_val;
5698 5698 iovp->iov_len = args->data_len;
5699 5699 }
5700 5700
5701 5701 uio.uio_iov = iovp;
5702 5702 uio.uio_iovcnt = iovcnt;
5703 5703
5704 5704 uio.uio_segflg = UIO_SYSSPACE;
5705 5705 uio.uio_extflg = UIO_COPY_DEFAULT;
5706 5706 uio.uio_loffset = args->offset;
5707 5707 uio.uio_resid = args->data_len;
5708 5708 uio.uio_llimit = curproc->p_fsz_ctl;
5709 5709 rlimit = uio.uio_llimit - args->offset;
5710 5710 if (rlimit < (u_offset_t)uio.uio_resid)
5711 5711 uio.uio_resid = (int)rlimit;
5712 5712
5713 5713 if (args->stable == UNSTABLE4)
5714 5714 ioflag = 0;
5715 5715 else if (args->stable == FILE_SYNC4)
5716 5716 ioflag = FSYNC;
5717 5717 else if (args->stable == DATA_SYNC4)
5718 5718 ioflag = FDSYNC;
5719 5719 else {
5720 5720 if (iovp != iov)
5721 5721 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5722 5722 *cs->statusp = resp->status = NFS4ERR_INVAL;
5723 5723 goto out;
5724 5724 }
5725 5725
5726 5726 /*
5727 5727 * We're changing creds because VM may fault and we need
5728 5728 * the cred of the current thread to be used if quota
5729 5729 * checking is enabled.
5730 5730 */
5731 5731 savecred = curthread->t_cred;
5732 5732 curthread->t_cred = cr;
5733 5733 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5734 5734 curthread->t_cred = savecred;
5735 5735
5736 5736 if (iovp != iov)
5737 5737 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5738 5738
5739 5739 if (error) {
5740 5740 *cs->statusp = resp->status = puterrno4(error);
5741 5741 goto out;
5742 5742 }
5743 5743
5744 5744 *cs->statusp = resp->status = NFS4_OK;
5745 5745 resp->count = args->data_len - uio.uio_resid;
5746 5746
5747 5747 if (ioflag == 0)
5748 5748 resp->committed = UNSTABLE4;
5749 5749 else
5750 5750 resp->committed = FILE_SYNC4;
5751 5751
5752 5752 resp->writeverf = nsrv4->write4verf;
5753 5753
5754 5754 out:
5755 5755 if (in_crit)
5756 5756 nbl_end_crit(vp);
5757 5757
5758 5758 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5759 5759 WRITE4res *, resp);
5760 5760 }
5761 5761
5762 5762
5763 5763 /* XXX put in a header file */
5764 5764 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5765 5765
5766 5766 void
5767 5767 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5768 5768 struct svc_req *req, cred_t *cr, int *rv)
5769 5769 {
5770 5770 uint_t i;
5771 5771 struct compound_state cs;
5772 5772 nfs4_srv_t *nsrv4;
5773 5773 nfs_export_t *ne = nfs_get_export();
5774 5774
5775 5775 if (rv != NULL)
5776 5776 *rv = 0;
5777 5777 rfs4_init_compound_state(&cs);
5778 5778 /*
5779 5779 * Form a reply tag by copying over the reqeuest tag.
5780 5780 */
5781 5781 resp->tag.utf8string_val =
5782 5782 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5783 5783 resp->tag.utf8string_len = args->tag.utf8string_len;
5784 5784 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5785 5785 resp->tag.utf8string_len);
5786 5786
5787 5787 cs.statusp = &resp->status;
5788 5788 cs.req = req;
5789 5789 resp->array = NULL;
5790 5790 resp->array_len = 0;
5791 5791
5792 5792 /*
5793 5793 * XXX for now, minorversion should be zero
5794 5794 */
5795 5795 if (args->minorversion != NFS4_MINORVERSION) {
5796 5796 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5797 5797 &cs, COMPOUND4args *, args);
5798 5798 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5799 5799 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5800 5800 &cs, COMPOUND4res *, resp);
5801 5801 return;
5802 5802 }
5803 5803
5804 5804 if (args->array_len == 0) {
5805 5805 resp->status = NFS4_OK;
5806 5806 return;
5807 5807 }
5808 5808
5809 5809 ASSERT(exi == NULL);
5810 5810 ASSERT(cr == NULL);
5811 5811
5812 5812 cr = crget();
5813 5813 ASSERT(cr != NULL);
5814 5814
5815 5815 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5816 5816 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5817 5817 &cs, COMPOUND4args *, args);
5818 5818 crfree(cr);
5819 5819 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5820 5820 &cs, COMPOUND4res *, resp);
5821 5821 svcerr_badcred(req->rq_xprt);
5822 5822 if (rv != NULL)
5823 5823 *rv = 1;
5824 5824 return;
5825 5825 }
5826 5826 resp->array_len = args->array_len;
5827 5827 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5828 5828 KM_SLEEP);
5829 5829
5830 5830 cs.basecr = cr;
5831 5831 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5832 5832
5833 5833 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5834 5834 COMPOUND4args *, args);
5835 5835
5836 5836 /*
5837 5837 * For now, NFS4 compound processing must be protected by
5838 5838 * exported_lock because it can access more than one exportinfo
5839 5839 * per compound and share/unshare can now change multiple
5840 5840 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5841 5841 * per proc (excluding public exinfo), and exi_count design
5842 5842 * is sufficient to protect concurrent execution of NFS2/3
5843 5843 * ops along with unexport. This lock will be removed as
5844 5844 * part of the NFSv4 phase 2 namespace redesign work.
5845 5845 */
5846 5846 rw_enter(&ne->exported_lock, RW_READER);
5847 5847
5848 5848 /*
5849 5849 * If this is the first compound we've seen, we need to start all
5850 5850 * new instances' grace periods.
5851 5851 */
5852 5852 if (nsrv4->seen_first_compound == 0) {
5853 5853 rfs4_grace_start_new(nsrv4);
5854 5854 /*
5855 5855 * This must be set after rfs4_grace_start_new(), otherwise
5856 5856 * another thread could proceed past here before the former
5857 5857 * is finished.
5858 5858 */
5859 5859 nsrv4->seen_first_compound = 1;
5860 5860 }
5861 5861
5862 5862 for (i = 0; i < args->array_len && cs.cont; i++) {
5863 5863 nfs_argop4 *argop;
5864 5864 nfs_resop4 *resop;
5865 5865 uint_t op;
5866 5866
5867 5867 argop = &args->array[i];
5868 5868 resop = &resp->array[i];
5869 5869 resop->resop = argop->argop;
5870 5870 op = (uint_t)resop->resop;
5871 5871
5872 5872 if (op < rfsv4disp_cnt) {
5873 5873 /*
5874 5874 * Count the individual ops here; NULL and COMPOUND
5875 5875 * are counted in common_dispatch()
5876 5876 */
5877 5877 rfsproccnt_v4_ptr[op].value.ui64++;
5878 5878
5879 5879 NFS4_DEBUG(rfs4_debug > 1,
5880 5880 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5881 5881 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5882 5882 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5883 5883 rfs4_op_string[op], *cs.statusp));
5884 5884 if (*cs.statusp != NFS4_OK)
5885 5885 cs.cont = FALSE;
5886 5886 } else {
5887 5887 /*
5888 5888 * This is effectively dead code since XDR code
5889 5889 * will have already returned BADXDR if op doesn't
5890 5890 * decode to legal value. This only done for a
5891 5891 * day when XDR code doesn't verify v4 opcodes.
5892 5892 */
5893 5893 op = OP_ILLEGAL;
5894 5894 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5895 5895
5896 5896 rfs4_op_illegal(argop, resop, req, &cs);
5897 5897 cs.cont = FALSE;
5898 5898 }
5899 5899
5900 5900 /*
5901 5901 * If not at last op, and if we are to stop, then
5902 5902 * compact the results array.
5903 5903 */
5904 5904 if ((i + 1) < args->array_len && !cs.cont) {
5905 5905 nfs_resop4 *new_res = kmem_alloc(
5906 5906 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5907 5907 bcopy(resp->array,
5908 5908 new_res, (i+1) * sizeof (nfs_resop4));
5909 5909 kmem_free(resp->array,
5910 5910 args->array_len * sizeof (nfs_resop4));
5911 5911
5912 5912 resp->array_len = i + 1;
5913 5913 resp->array = new_res;
5914 5914 }
5915 5915 }
5916 5916
5917 5917 rw_exit(&ne->exported_lock);
5918 5918
5919 5919 /*
5920 5920 * clear exportinfo and vnode fields from compound_state before dtrace
5921 5921 * probe, to avoid tracing residual values for path and share path.
5922 5922 */
5923 5923 if (cs.vp)
5924 5924 VN_RELE(cs.vp);
5925 5925 if (cs.saved_vp)
5926 5926 VN_RELE(cs.saved_vp);
5927 5927 cs.exi = cs.saved_exi = NULL;
5928 5928 cs.vp = cs.saved_vp = NULL;
5929 5929
5930 5930 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5931 5931 COMPOUND4res *, resp);
5932 5932
5933 5933 if (cs.saved_fh.nfs_fh4_val)
5934 5934 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5935 5935
5936 5936 if (cs.basecr)
5937 5937 crfree(cs.basecr);
5938 5938 if (cs.cr)
5939 5939 crfree(cs.cr);
5940 5940 /*
5941 5941 * done with this compound request, free the label
5942 5942 */
5943 5943
5944 5944 if (req->rq_label != NULL) {
5945 5945 kmem_free(req->rq_label, sizeof (bslabel_t));
5946 5946 req->rq_label = NULL;
5947 5947 }
5948 5948 }
5949 5949
5950 5950 /*
5951 5951 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5952 5952 * XXX zero out the tag and array values. Need to investigate why the
5953 5953 * XXX calls occur, but at least prevent the panic for now.
5954 5954 */
5955 5955 void
5956 5956 rfs4_compound_free(COMPOUND4res *resp)
5957 5957 {
5958 5958 uint_t i;
5959 5959
5960 5960 if (resp->tag.utf8string_val) {
5961 5961 UTF8STRING_FREE(resp->tag)
5962 5962 }
5963 5963
5964 5964 for (i = 0; i < resp->array_len; i++) {
5965 5965 nfs_resop4 *resop;
5966 5966 uint_t op;
5967 5967
5968 5968 resop = &resp->array[i];
5969 5969 op = (uint_t)resop->resop;
5970 5970 if (op < rfsv4disp_cnt) {
5971 5971 (*rfsv4disptab[op].dis_resfree)(resop);
5972 5972 }
5973 5973 }
5974 5974 if (resp->array != NULL) {
5975 5975 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5976 5976 }
5977 5977 }
5978 5978
5979 5979 /*
5980 5980 * Process the value of the compound request rpc flags, as a bit-AND
5981 5981 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5982 5982 */
5983 5983 void
5984 5984 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5985 5985 {
5986 5986 int i;
5987 5987 int flag = RPC_ALL;
5988 5988
5989 5989 for (i = 0; flag && i < args->array_len; i++) {
5990 5990 uint_t op;
5991 5991
5992 5992 op = (uint_t)args->array[i].argop;
5993 5993
5994 5994 if (op < rfsv4disp_cnt)
5995 5995 flag &= rfsv4disptab[op].dis_flags;
5996 5996 else
5997 5997 flag = 0;
5998 5998 }
5999 5999 *flagp = flag;
6000 6000 }
6001 6001
6002 6002 nfsstat4
6003 6003 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6004 6004 {
6005 6005 nfsstat4 e;
6006 6006
6007 6007 rfs4_dbe_lock(cp->rc_dbe);
6008 6008
6009 6009 if (cp->rc_sysidt != LM_NOSYSID) {
6010 6010 *sp = cp->rc_sysidt;
6011 6011 e = NFS4_OK;
6012 6012
6013 6013 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6014 6014 *sp = cp->rc_sysidt;
6015 6015 e = NFS4_OK;
6016 6016
6017 6017 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6018 6018 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6019 6019 } else
6020 6020 e = NFS4ERR_DELAY;
6021 6021
6022 6022 rfs4_dbe_unlock(cp->rc_dbe);
6023 6023 return (e);
6024 6024 }
6025 6025
6026 6026 #if defined(DEBUG) && ! defined(lint)
6027 6027 static void lock_print(char *str, int operation, struct flock64 *flk)
6028 6028 {
6029 6029 char *op, *type;
6030 6030
6031 6031 switch (operation) {
6032 6032 case F_GETLK: op = "F_GETLK";
6033 6033 break;
6034 6034 case F_SETLK: op = "F_SETLK";
6035 6035 break;
6036 6036 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6037 6037 break;
6038 6038 default: op = "F_UNKNOWN";
6039 6039 break;
6040 6040 }
6041 6041 switch (flk->l_type) {
6042 6042 case F_UNLCK: type = "F_UNLCK";
6043 6043 break;
6044 6044 case F_RDLCK: type = "F_RDLCK";
6045 6045 break;
6046 6046 case F_WRLCK: type = "F_WRLCK";
6047 6047 break;
6048 6048 default: type = "F_UNKNOWN";
6049 6049 break;
6050 6050 }
6051 6051
6052 6052 ASSERT(flk->l_whence == 0);
6053 6053 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6054 6054 str, op, type, (longlong_t)flk->l_start,
6055 6055 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6056 6056 }
6057 6057
6058 6058 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6059 6059 #else
6060 6060 #define LOCK_PRINT(d, s, t, f)
6061 6061 #endif
6062 6062
6063 6063 /*ARGSUSED*/
6064 6064 static bool_t
6065 6065 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6066 6066 {
6067 6067 return (TRUE);
6068 6068 }
6069 6069
6070 6070 /*
6071 6071 * Look up the pathname using the vp in cs as the directory vnode.
6072 6072 * cs->vp will be the vnode for the file on success
6073 6073 */
6074 6074
6075 6075 static nfsstat4
6076 6076 rfs4_lookup(component4 *component, struct svc_req *req,
6077 6077 struct compound_state *cs)
6078 6078 {
6079 6079 char *nm;
6080 6080 uint32_t len;
6081 6081 nfsstat4 status;
6082 6082 struct sockaddr *ca;
6083 6083 char *name;
6084 6084
6085 6085 if (cs->vp == NULL) {
6086 6086 return (NFS4ERR_NOFILEHANDLE);
6087 6087 }
6088 6088 if (cs->vp->v_type != VDIR) {
6089 6089 return (NFS4ERR_NOTDIR);
6090 6090 }
6091 6091
6092 6092 status = utf8_dir_verify(component);
6093 6093 if (status != NFS4_OK)
6094 6094 return (status);
6095 6095
6096 6096 nm = utf8_to_fn(component, &len, NULL);
6097 6097 if (nm == NULL) {
6098 6098 return (NFS4ERR_INVAL);
6099 6099 }
6100 6100
6101 6101 if (len > MAXNAMELEN) {
6102 6102 kmem_free(nm, len);
6103 6103 return (NFS4ERR_NAMETOOLONG);
6104 6104 }
6105 6105
6106 6106 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6107 6107 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6108 6108 MAXPATHLEN + 1);
6109 6109
6110 6110 if (name == NULL) {
6111 6111 kmem_free(nm, len);
6112 6112 return (NFS4ERR_INVAL);
6113 6113 }
6114 6114
6115 6115 status = do_rfs4_op_lookup(name, req, cs);
6116 6116
6117 6117 if (name != nm)
6118 6118 kmem_free(name, MAXPATHLEN + 1);
6119 6119
6120 6120 kmem_free(nm, len);
6121 6121
6122 6122 return (status);
6123 6123 }
6124 6124
6125 6125 static nfsstat4
6126 6126 rfs4_lookupfile(component4 *component, struct svc_req *req,
6127 6127 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6128 6128 {
6129 6129 nfsstat4 status;
6130 6130 vnode_t *dvp = cs->vp;
6131 6131 vattr_t bva, ava, fva;
6132 6132 int error;
6133 6133
6134 6134 /* Get "before" change value */
6135 6135 bva.va_mask = AT_CTIME|AT_SEQ;
6136 6136 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6137 6137 if (error)
6138 6138 return (puterrno4(error));
6139 6139
6140 6140 /* rfs4_lookup may VN_RELE directory */
6141 6141 VN_HOLD(dvp);
6142 6142
6143 6143 status = rfs4_lookup(component, req, cs);
6144 6144 if (status != NFS4_OK) {
6145 6145 VN_RELE(dvp);
6146 6146 return (status);
6147 6147 }
6148 6148
6149 6149 /*
6150 6150 * Get "after" change value, if it fails, simply return the
6151 6151 * before value.
6152 6152 */
6153 6153 ava.va_mask = AT_CTIME|AT_SEQ;
6154 6154 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6155 6155 ava.va_ctime = bva.va_ctime;
6156 6156 ava.va_seq = 0;
6157 6157 }
6158 6158 VN_RELE(dvp);
6159 6159
6160 6160 /*
6161 6161 * Validate the file is a file
6162 6162 */
6163 6163 fva.va_mask = AT_TYPE|AT_MODE;
6164 6164 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6165 6165 if (error)
6166 6166 return (puterrno4(error));
6167 6167
6168 6168 if (fva.va_type != VREG) {
6169 6169 if (fva.va_type == VDIR)
6170 6170 return (NFS4ERR_ISDIR);
6171 6171 if (fva.va_type == VLNK)
6172 6172 return (NFS4ERR_SYMLINK);
6173 6173 return (NFS4ERR_INVAL);
6174 6174 }
6175 6175
6176 6176 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6177 6177 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6178 6178
6179 6179 /*
6180 6180 * It is undefined if VOP_LOOKUP will change va_seq, so
6181 6181 * cinfo.atomic = TRUE only if we have
6182 6182 * non-zero va_seq's, and they have not changed.
6183 6183 */
6184 6184 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6185 6185 cinfo->atomic = TRUE;
6186 6186 else
6187 6187 cinfo->atomic = FALSE;
6188 6188
6189 6189 /* Check for mandatory locking */
6190 6190 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6191 6191 return (check_open_access(access, cs, req));
6192 6192 }
6193 6193
6194 6194 static nfsstat4
6195 6195 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6196 6196 cred_t *cr, vnode_t **vpp, bool_t *created)
6197 6197 {
6198 6198 int error;
6199 6199 nfsstat4 status = NFS4_OK;
6200 6200 vattr_t va;
6201 6201
6202 6202 tryagain:
6203 6203
6204 6204 /*
6205 6205 * The file open mode used is VWRITE. If the client needs
6206 6206 * some other semantic, then it should do the access checking
6207 6207 * itself. It would have been nice to have the file open mode
6208 6208 * passed as part of the arguments.
6209 6209 */
6210 6210
6211 6211 *created = TRUE;
6212 6212 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6213 6213
6214 6214 if (error) {
6215 6215 *created = FALSE;
6216 6216
6217 6217 /*
6218 6218 * If we got something other than file already exists
6219 6219 * then just return this error. Otherwise, we got
6220 6220 * EEXIST. If we were doing a GUARDED create, then
6221 6221 * just return this error. Otherwise, we need to
6222 6222 * make sure that this wasn't a duplicate of an
6223 6223 * exclusive create request.
6224 6224 *
6225 6225 * The assumption is made that a non-exclusive create
6226 6226 * request will never return EEXIST.
6227 6227 */
6228 6228
6229 6229 if (error != EEXIST || mode == GUARDED4) {
6230 6230 status = puterrno4(error);
6231 6231 return (status);
6232 6232 }
6233 6233 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6234 6234 NULL, NULL, NULL);
6235 6235
6236 6236 if (error) {
6237 6237 /*
6238 6238 * We couldn't find the file that we thought that
6239 6239 * we just created. So, we'll just try creating
6240 6240 * it again.
6241 6241 */
6242 6242 if (error == ENOENT)
6243 6243 goto tryagain;
6244 6244
6245 6245 status = puterrno4(error);
6246 6246 return (status);
6247 6247 }
6248 6248
6249 6249 if (mode == UNCHECKED4) {
6250 6250 /* existing object must be regular file */
6251 6251 if ((*vpp)->v_type != VREG) {
6252 6252 if ((*vpp)->v_type == VDIR)
6253 6253 status = NFS4ERR_ISDIR;
6254 6254 else if ((*vpp)->v_type == VLNK)
6255 6255 status = NFS4ERR_SYMLINK;
6256 6256 else
6257 6257 status = NFS4ERR_INVAL;
6258 6258 VN_RELE(*vpp);
6259 6259 return (status);
6260 6260 }
6261 6261
6262 6262 return (NFS4_OK);
6263 6263 }
6264 6264
6265 6265 /* Check for duplicate request */
6266 6266 va.va_mask = AT_MTIME;
6267 6267 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6268 6268 if (!error) {
6269 6269 /* We found the file */
6270 6270 const timestruc_t *mtime = &vap->va_mtime;
6271 6271
6272 6272 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6273 6273 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6274 6274 /* but its not our creation */
6275 6275 VN_RELE(*vpp);
6276 6276 return (NFS4ERR_EXIST);
6277 6277 }
6278 6278 *created = TRUE; /* retrans of create == created */
6279 6279 return (NFS4_OK);
6280 6280 }
6281 6281 VN_RELE(*vpp);
6282 6282 return (NFS4ERR_EXIST);
6283 6283 }
6284 6284
6285 6285 return (NFS4_OK);
6286 6286 }
6287 6287
6288 6288 static nfsstat4
6289 6289 check_open_access(uint32_t access, struct compound_state *cs,
6290 6290 struct svc_req *req)
6291 6291 {
6292 6292 int error;
6293 6293 vnode_t *vp;
6294 6294 bool_t readonly;
6295 6295 cred_t *cr = cs->cr;
6296 6296
6297 6297 /* For now we don't allow mandatory locking as per V2/V3 */
6298 6298 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6299 6299 return (NFS4ERR_ACCESS);
6300 6300 }
6301 6301
6302 6302 vp = cs->vp;
6303 6303 ASSERT(cr != NULL && vp->v_type == VREG);
6304 6304
6305 6305 /*
6306 6306 * If the file system is exported read only and we are trying
6307 6307 * to open for write, then return NFS4ERR_ROFS
6308 6308 */
6309 6309
6310 6310 readonly = rdonly4(req, cs);
6311 6311
6312 6312 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6313 6313 return (NFS4ERR_ROFS);
6314 6314
6315 6315 if (access & OPEN4_SHARE_ACCESS_READ) {
6316 6316 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6317 6317 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6318 6318 return (NFS4ERR_ACCESS);
6319 6319 }
6320 6320 }
6321 6321
6322 6322 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6323 6323 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6324 6324 if (error)
6325 6325 return (NFS4ERR_ACCESS);
6326 6326 }
6327 6327
6328 6328 return (NFS4_OK);
6329 6329 }
6330 6330
6331 6331 static nfsstat4
6332 6332 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6333 6333 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6334 6334 {
6335 6335 struct nfs4_svgetit_arg sarg;
6336 6336 struct nfs4_ntov_table ntov;
6337 6337
6338 6338 bool_t ntov_table_init = FALSE;
6339 6339 struct statvfs64 sb;
6340 6340 nfsstat4 status;
6341 6341 vnode_t *vp;
6342 6342 vattr_t bva, ava, iva, cva, *vap;
6343 6343 vnode_t *dvp;
6344 6344 timespec32_t *mtime;
6345 6345 char *nm = NULL;
6346 6346 uint_t buflen;
6347 6347 bool_t created;
6348 6348 bool_t setsize = FALSE;
6349 6349 len_t reqsize;
6350 6350 int error;
6351 6351 bool_t trunc;
6352 6352 caller_context_t ct;
6353 6353 component4 *component;
6354 6354 bslabel_t *clabel;
6355 6355 struct sockaddr *ca;
6356 6356 char *name = NULL;
6357 6357
6358 6358 sarg.sbp = &sb;
6359 6359 sarg.is_referral = B_FALSE;
6360 6360
6361 6361 dvp = cs->vp;
6362 6362
6363 6363 /* Check if the file system is read only */
6364 6364 if (rdonly4(req, cs))
6365 6365 return (NFS4ERR_ROFS);
6366 6366
6367 6367 /* check the label of including directory */
6368 6368 if (is_system_labeled()) {
6369 6369 ASSERT(req->rq_label != NULL);
6370 6370 clabel = req->rq_label;
6371 6371 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6372 6372 "got client label from request(1)",
6373 6373 struct svc_req *, req);
6374 6374 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6375 6375 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6376 6376 cs->exi)) {
6377 6377 return (NFS4ERR_ACCESS);
6378 6378 }
6379 6379 }
6380 6380 }
6381 6381
6382 6382 /*
6383 6383 * Get the last component of path name in nm. cs will reference
6384 6384 * the including directory on success.
6385 6385 */
6386 6386 component = &args->open_claim4_u.file;
6387 6387 status = utf8_dir_verify(component);
6388 6388 if (status != NFS4_OK)
6389 6389 return (status);
6390 6390
6391 6391 nm = utf8_to_fn(component, &buflen, NULL);
6392 6392
6393 6393 if (nm == NULL)
6394 6394 return (NFS4ERR_RESOURCE);
6395 6395
6396 6396 if (buflen > MAXNAMELEN) {
6397 6397 kmem_free(nm, buflen);
6398 6398 return (NFS4ERR_NAMETOOLONG);
6399 6399 }
6400 6400
6401 6401 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6402 6402 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6403 6403 if (error) {
6404 6404 kmem_free(nm, buflen);
6405 6405 return (puterrno4(error));
6406 6406 }
6407 6407
6408 6408 if (bva.va_type != VDIR) {
6409 6409 kmem_free(nm, buflen);
6410 6410 return (NFS4ERR_NOTDIR);
6411 6411 }
6412 6412
6413 6413 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6414 6414
6415 6415 switch (args->mode) {
6416 6416 case GUARDED4:
6417 6417 /*FALLTHROUGH*/
6418 6418 case UNCHECKED4:
6419 6419 nfs4_ntov_table_init(&ntov);
6420 6420 ntov_table_init = TRUE;
6421 6421
6422 6422 *attrset = 0;
6423 6423 status = do_rfs4_set_attrs(attrset,
6424 6424 &args->createhow4_u.createattrs,
6425 6425 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6426 6426
6427 6427 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6428 6428 sarg.vap->va_type != VREG) {
6429 6429 if (sarg.vap->va_type == VDIR)
6430 6430 status = NFS4ERR_ISDIR;
6431 6431 else if (sarg.vap->va_type == VLNK)
6432 6432 status = NFS4ERR_SYMLINK;
6433 6433 else
6434 6434 status = NFS4ERR_INVAL;
6435 6435 }
6436 6436
6437 6437 if (status != NFS4_OK) {
6438 6438 kmem_free(nm, buflen);
6439 6439 nfs4_ntov_table_free(&ntov, &sarg);
6440 6440 *attrset = 0;
6441 6441 return (status);
6442 6442 }
6443 6443
6444 6444 vap = sarg.vap;
6445 6445 vap->va_type = VREG;
6446 6446 vap->va_mask |= AT_TYPE;
6447 6447
6448 6448 if ((vap->va_mask & AT_MODE) == 0) {
6449 6449 vap->va_mask |= AT_MODE;
6450 6450 vap->va_mode = (mode_t)0600;
6451 6451 }
6452 6452
6453 6453 if (vap->va_mask & AT_SIZE) {
6454 6454
6455 6455 /* Disallow create with a non-zero size */
6456 6456
6457 6457 if ((reqsize = sarg.vap->va_size) != 0) {
6458 6458 kmem_free(nm, buflen);
6459 6459 nfs4_ntov_table_free(&ntov, &sarg);
6460 6460 *attrset = 0;
6461 6461 return (NFS4ERR_INVAL);
6462 6462 }
6463 6463 setsize = TRUE;
6464 6464 }
6465 6465 break;
6466 6466
6467 6467 case EXCLUSIVE4:
6468 6468 /* prohibit EXCL create of named attributes */
6469 6469 if (dvp->v_flag & V_XATTRDIR) {
6470 6470 kmem_free(nm, buflen);
6471 6471 *attrset = 0;
6472 6472 return (NFS4ERR_INVAL);
6473 6473 }
6474 6474
6475 6475 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6476 6476 cva.va_type = VREG;
6477 6477 /*
6478 6478 * Ensure no time overflows. Assumes underlying
6479 6479 * filesystem supports at least 32 bits.
6480 6480 * Truncate nsec to usec resolution to allow valid
6481 6481 * compares even if the underlying filesystem truncates.
6482 6482 */
6483 6483 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6484 6484 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6485 6485 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6486 6486 cva.va_mode = (mode_t)0;
6487 6487 vap = &cva;
6488 6488
6489 6489 /*
6490 6490 * For EXCL create, attrset is set to the server attr
6491 6491 * used to cache the client's verifier.
6492 6492 */
6493 6493 *attrset = FATTR4_TIME_MODIFY_MASK;
6494 6494 break;
6495 6495 }
6496 6496
6497 6497 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6498 6498 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6499 6499 MAXPATHLEN + 1);
6500 6500
6501 6501 if (name == NULL) {
6502 6502 kmem_free(nm, buflen);
6503 6503 return (NFS4ERR_SERVERFAULT);
6504 6504 }
6505 6505
6506 6506 status = create_vnode(dvp, name, vap, args->mode,
6507 6507 cs->cr, &vp, &created);
6508 6508 if (nm != name)
6509 6509 kmem_free(name, MAXPATHLEN + 1);
6510 6510 kmem_free(nm, buflen);
6511 6511
6512 6512 if (status != NFS4_OK) {
6513 6513 if (ntov_table_init)
6514 6514 nfs4_ntov_table_free(&ntov, &sarg);
6515 6515 *attrset = 0;
6516 6516 return (status);
6517 6517 }
6518 6518
6519 6519 trunc = (setsize && !created);
6520 6520
6521 6521 if (args->mode != EXCLUSIVE4) {
6522 6522 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6523 6523
6524 6524 /*
6525 6525 * True verification that object was created with correct
6526 6526 * attrs is impossible. The attrs could have been changed
6527 6527 * immediately after object creation. If attributes did
6528 6528 * not verify, the only recourse for the server is to
6529 6529 * destroy the object. Maybe if some attrs (like gid)
6530 6530 * are set incorrectly, the object should be destroyed;
6531 6531 * however, seems bad as a default policy. Do we really
6532 6532 * want to destroy an object over one of the times not
6533 6533 * verifying correctly? For these reasons, the server
6534 6534 * currently sets bits in attrset for createattrs
6535 6535 * that were set; however, no verification is done.
6536 6536 *
6537 6537 * vmask_to_nmask accounts for vattr bits set on create
6538 6538 * [do_rfs4_set_attrs() only sets resp bits for
6539 6539 * non-vattr/vfs bits.]
6540 6540 * Mask off any bits we set by default so as not to return
6541 6541 * more attrset bits than were requested in createattrs
6542 6542 */
6543 6543 if (created) {
6544 6544 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6545 6545 *attrset &= createmask;
6546 6546 } else {
6547 6547 /*
6548 6548 * We did not create the vnode (we tried but it
6549 6549 * already existed). In this case, the only createattr
6550 6550 * that the spec allows the server to set is size,
6551 6551 * and even then, it can only be set if it is 0.
6552 6552 */
6553 6553 *attrset = 0;
6554 6554 if (trunc)
6555 6555 *attrset = FATTR4_SIZE_MASK;
6556 6556 }
6557 6557 }
6558 6558 if (ntov_table_init)
6559 6559 nfs4_ntov_table_free(&ntov, &sarg);
6560 6560
6561 6561 /*
6562 6562 * Get the initial "after" sequence number, if it fails,
6563 6563 * set to zero, time to before.
6564 6564 */
6565 6565 iva.va_mask = AT_CTIME|AT_SEQ;
6566 6566 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6567 6567 iva.va_seq = 0;
6568 6568 iva.va_ctime = bva.va_ctime;
6569 6569 }
6570 6570
6571 6571 /*
6572 6572 * create_vnode attempts to create the file exclusive,
6573 6573 * if it already exists the VOP_CREATE will fail and
6574 6574 * may not increase va_seq. It is atomic if
6575 6575 * we haven't changed the directory, but if it has changed
6576 6576 * we don't know what changed it.
6577 6577 */
6578 6578 if (!created) {
6579 6579 if (bva.va_seq && iva.va_seq &&
6580 6580 bva.va_seq == iva.va_seq)
6581 6581 cinfo->atomic = TRUE;
6582 6582 else
6583 6583 cinfo->atomic = FALSE;
6584 6584 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6585 6585 } else {
6586 6586 /*
6587 6587 * The entry was created, we need to sync the
6588 6588 * directory metadata.
6589 6589 */
6590 6590 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6591 6591
6592 6592 /*
6593 6593 * Get "after" change value, if it fails, simply return the
6594 6594 * before value.
6595 6595 */
6596 6596 ava.va_mask = AT_CTIME|AT_SEQ;
6597 6597 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6598 6598 ava.va_ctime = bva.va_ctime;
6599 6599 ava.va_seq = 0;
6600 6600 }
6601 6601
6602 6602 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6603 6603
6604 6604 /*
6605 6605 * The cinfo->atomic = TRUE only if we have
6606 6606 * non-zero va_seq's, and it has incremented by exactly one
6607 6607 * during the create_vnode and it didn't
6608 6608 * change during the VOP_FSYNC.
6609 6609 */
6610 6610 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6611 6611 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6612 6612 cinfo->atomic = TRUE;
6613 6613 else
6614 6614 cinfo->atomic = FALSE;
6615 6615 }
6616 6616
6617 6617 /* Check for mandatory locking and that the size gets set. */
6618 6618 cva.va_mask = AT_MODE;
6619 6619 if (setsize)
6620 6620 cva.va_mask |= AT_SIZE;
6621 6621
6622 6622 /* Assume the worst */
6623 6623 cs->mandlock = TRUE;
6624 6624
6625 6625 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6626 6626 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6627 6627
6628 6628 /*
6629 6629 * Truncate the file if necessary; this would be
6630 6630 * the case for create over an existing file.
6631 6631 */
6632 6632
6633 6633 if (trunc) {
6634 6634 int in_crit = 0;
6635 6635 rfs4_file_t *fp;
6636 6636 nfs4_srv_t *nsrv4;
6637 6637 bool_t create = FALSE;
6638 6638
6639 6639 /*
6640 6640 * We are writing over an existing file.
6641 6641 * Check to see if we need to recall a delegation.
6642 6642 */
6643 6643 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
6644 6644 rfs4_hold_deleg_policy(nsrv4);
6645 6645 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6646 6646 if (rfs4_check_delegated_byfp(FWRITE, fp,
6647 6647 (reqsize == 0), FALSE, FALSE, &clientid)) {
6648 6648 rfs4_file_rele(fp);
6649 6649 rfs4_rele_deleg_policy(nsrv4);
6650 6650 VN_RELE(vp);
6651 6651 *attrset = 0;
6652 6652 return (NFS4ERR_DELAY);
6653 6653 }
6654 6654 rfs4_file_rele(fp);
6655 6655 }
6656 6656 rfs4_rele_deleg_policy(nsrv4);
6657 6657
6658 6658 if (nbl_need_check(vp)) {
6659 6659 in_crit = 1;
6660 6660
6661 6661 ASSERT(reqsize == 0);
6662 6662
6663 6663 nbl_start_crit(vp, RW_READER);
6664 6664 if (nbl_conflict(vp, NBL_WRITE, 0,
6665 6665 cva.va_size, 0, NULL)) {
6666 6666 in_crit = 0;
6667 6667 nbl_end_crit(vp);
6668 6668 VN_RELE(vp);
6669 6669 *attrset = 0;
6670 6670 return (NFS4ERR_ACCESS);
6671 6671 }
6672 6672 }
6673 6673 ct.cc_sysid = 0;
6674 6674 ct.cc_pid = 0;
6675 6675 ct.cc_caller_id = nfs4_srv_caller_id;
6676 6676 ct.cc_flags = CC_DONTBLOCK;
6677 6677
6678 6678 cva.va_mask = AT_SIZE;
6679 6679 cva.va_size = reqsize;
6680 6680 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6681 6681 if (in_crit)
6682 6682 nbl_end_crit(vp);
6683 6683 }
6684 6684 }
6685 6685
6686 6686 error = makefh4(&cs->fh, vp, cs->exi);
6687 6687
6688 6688 /*
6689 6689 * Force modified data and metadata out to stable storage.
6690 6690 */
6691 6691 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6692 6692
6693 6693 if (error) {
6694 6694 VN_RELE(vp);
6695 6695 *attrset = 0;
6696 6696 return (puterrno4(error));
6697 6697 }
6698 6698
6699 6699 /* if parent dir is attrdir, set namedattr fh flag */
6700 6700 if (dvp->v_flag & V_XATTRDIR)
6701 6701 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6702 6702
6703 6703 if (cs->vp)
6704 6704 VN_RELE(cs->vp);
6705 6705
6706 6706 cs->vp = vp;
6707 6707
6708 6708 /*
6709 6709 * if we did not create the file, we will need to check
6710 6710 * the access bits on the file
6711 6711 */
6712 6712
6713 6713 if (!created) {
6714 6714 if (setsize)
6715 6715 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6716 6716 status = check_open_access(args->share_access, cs, req);
6717 6717 if (status != NFS4_OK)
6718 6718 *attrset = 0;
6719 6719 }
6720 6720 return (status);
6721 6721 }
6722 6722
6723 6723 /*ARGSUSED*/
6724 6724 static void
6725 6725 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6726 6726 rfs4_openowner_t *oo, delegreq_t deleg,
6727 6727 uint32_t access, uint32_t deny,
6728 6728 OPEN4res *resp, int deleg_cur)
6729 6729 {
6730 6730 /* XXX Currently not using req */
6731 6731 rfs4_state_t *sp;
6732 6732 rfs4_file_t *fp;
6733 6733 bool_t screate = TRUE;
6734 6734 bool_t fcreate = TRUE;
6735 6735 uint32_t open_a, share_a;
6736 6736 uint32_t open_d, share_d;
6737 6737 rfs4_deleg_state_t *dsp;
6738 6738 sysid_t sysid;
6739 6739 nfsstat4 status;
6740 6740 caller_context_t ct;
6741 6741 int fflags = 0;
6742 6742 int recall = 0;
6743 6743 int err;
6744 6744 int first_open;
6745 6745
6746 6746 /* get the file struct and hold a lock on it during initial open */
6747 6747 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6748 6748 if (fp == NULL) {
6749 6749 resp->status = NFS4ERR_RESOURCE;
6750 6750 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6751 6751 return;
6752 6752 }
6753 6753
6754 6754 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6755 6755 if (sp == NULL) {
6756 6756 resp->status = NFS4ERR_RESOURCE;
6757 6757 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6758 6758 /* No need to keep any reference */
6759 6759 rw_exit(&fp->rf_file_rwlock);
6760 6760 rfs4_file_rele(fp);
6761 6761 return;
6762 6762 }
6763 6763
6764 6764 /* try to get the sysid before continuing */
6765 6765 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6766 6766 resp->status = status;
6767 6767 rfs4_file_rele(fp);
6768 6768 /* Not a fully formed open; "close" it */
6769 6769 if (screate == TRUE)
6770 6770 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6771 6771 rfs4_state_rele(sp);
6772 6772 return;
6773 6773 }
6774 6774
6775 6775 /* Calculate the fflags for this OPEN. */
6776 6776 if (access & OPEN4_SHARE_ACCESS_READ)
6777 6777 fflags |= FREAD;
6778 6778 if (access & OPEN4_SHARE_ACCESS_WRITE)
6779 6779 fflags |= FWRITE;
6780 6780
6781 6781 rfs4_dbe_lock(sp->rs_dbe);
6782 6782
6783 6783 /*
6784 6784 * Calculate the new deny and access mode that this open is adding to
6785 6785 * the file for this open owner;
6786 6786 */
6787 6787 open_d = (deny & ~sp->rs_open_deny);
6788 6788 open_a = (access & ~sp->rs_open_access);
6789 6789
6790 6790 /*
6791 6791 * Calculate the new share access and share deny modes that this open
6792 6792 * is adding to the file for this open owner;
6793 6793 */
6794 6794 share_a = (access & ~sp->rs_share_access);
6795 6795 share_d = (deny & ~sp->rs_share_deny);
6796 6796
6797 6797 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6798 6798
6799 6799 /*
6800 6800 * Check to see the client has already sent an open for this
6801 6801 * open owner on this file with the same share/deny modes.
6802 6802 * If so, we don't need to check for a conflict and we don't
6803 6803 * need to add another shrlock. If not, then we need to
6804 6804 * check for conflicts in deny and access before checking for
6805 6805 * conflicts in delegation. We don't want to recall a
6806 6806 * delegation based on an open that will eventually fail based
6807 6807 * on shares modes.
6808 6808 */
6809 6809
6810 6810 if (share_a || share_d) {
6811 6811 if ((err = rfs4_share(sp, access, deny)) != 0) {
6812 6812 rfs4_dbe_unlock(sp->rs_dbe);
6813 6813 resp->status = err;
6814 6814
6815 6815 rfs4_file_rele(fp);
6816 6816 /* Not a fully formed open; "close" it */
6817 6817 if (screate == TRUE)
6818 6818 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6819 6819 rfs4_state_rele(sp);
6820 6820 return;
6821 6821 }
6822 6822 }
6823 6823
6824 6824 rfs4_dbe_lock(fp->rf_dbe);
6825 6825
6826 6826 /*
6827 6827 * Check to see if this file is delegated and if so, if a
6828 6828 * recall needs to be done.
6829 6829 */
6830 6830 if (rfs4_check_recall(sp, access)) {
6831 6831 rfs4_dbe_unlock(fp->rf_dbe);
6832 6832 rfs4_dbe_unlock(sp->rs_dbe);
6833 6833 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6834 6834 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6835 6835 rfs4_dbe_lock(sp->rs_dbe);
6836 6836
6837 6837 /* if state closed while lock was dropped */
6838 6838 if (sp->rs_closed) {
6839 6839 if (share_a || share_d)
6840 6840 (void) rfs4_unshare(sp);
6841 6841 rfs4_dbe_unlock(sp->rs_dbe);
6842 6842 rfs4_file_rele(fp);
6843 6843 /* Not a fully formed open; "close" it */
6844 6844 if (screate == TRUE)
6845 6845 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6846 6846 rfs4_state_rele(sp);
6847 6847 resp->status = NFS4ERR_OLD_STATEID;
6848 6848 return;
6849 6849 }
6850 6850
6851 6851 rfs4_dbe_lock(fp->rf_dbe);
6852 6852 /* Let's see if the delegation was returned */
6853 6853 if (rfs4_check_recall(sp, access)) {
6854 6854 rfs4_dbe_unlock(fp->rf_dbe);
6855 6855 if (share_a || share_d)
6856 6856 (void) rfs4_unshare(sp);
6857 6857 rfs4_dbe_unlock(sp->rs_dbe);
6858 6858 rfs4_file_rele(fp);
6859 6859 rfs4_update_lease(sp->rs_owner->ro_client);
6860 6860
6861 6861 /* Not a fully formed open; "close" it */
6862 6862 if (screate == TRUE)
6863 6863 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6864 6864 rfs4_state_rele(sp);
6865 6865 resp->status = NFS4ERR_DELAY;
6866 6866 return;
6867 6867 }
6868 6868 }
6869 6869 /*
6870 6870 * the share check passed and any delegation conflict has been
6871 6871 * taken care of, now call vop_open.
6872 6872 * if this is the first open then call vop_open with fflags.
6873 6873 * if not, call vn_open_upgrade with just the upgrade flags.
6874 6874 *
6875 6875 * if the file has been opened already, it will have the current
6876 6876 * access mode in the state struct. if it has no share access, then
6877 6877 * this is a new open.
6878 6878 *
6879 6879 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6880 6880 * call VOP_OPEN(), just do the open upgrade.
6881 6881 */
6882 6882 if (first_open && !deleg_cur) {
6883 6883 ct.cc_sysid = sysid;
6884 6884 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6885 6885 ct.cc_caller_id = nfs4_srv_caller_id;
6886 6886 ct.cc_flags = CC_DONTBLOCK;
6887 6887 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6888 6888 if (err) {
6889 6889 rfs4_dbe_unlock(fp->rf_dbe);
6890 6890 if (share_a || share_d)
6891 6891 (void) rfs4_unshare(sp);
6892 6892 rfs4_dbe_unlock(sp->rs_dbe);
6893 6893 rfs4_file_rele(fp);
6894 6894
6895 6895 /* Not a fully formed open; "close" it */
6896 6896 if (screate == TRUE)
6897 6897 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6898 6898 rfs4_state_rele(sp);
6899 6899 /* check if a monitor detected a delegation conflict */
6900 6900 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6901 6901 resp->status = NFS4ERR_DELAY;
6902 6902 else
6903 6903 resp->status = NFS4ERR_SERVERFAULT;
6904 6904 return;
6905 6905 }
6906 6906 } else { /* open upgrade */
6907 6907 /*
6908 6908 * calculate the fflags for the new mode that is being added
6909 6909 * by this upgrade.
6910 6910 */
6911 6911 fflags = 0;
6912 6912 if (open_a & OPEN4_SHARE_ACCESS_READ)
6913 6913 fflags |= FREAD;
6914 6914 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6915 6915 fflags |= FWRITE;
6916 6916 vn_open_upgrade(cs->vp, fflags);
6917 6917 }
6918 6918 sp->rs_open_access |= access;
6919 6919 sp->rs_open_deny |= deny;
6920 6920
6921 6921 if (open_d & OPEN4_SHARE_DENY_READ)
6922 6922 fp->rf_deny_read++;
6923 6923 if (open_d & OPEN4_SHARE_DENY_WRITE)
6924 6924 fp->rf_deny_write++;
6925 6925 fp->rf_share_deny |= deny;
6926 6926
6927 6927 if (open_a & OPEN4_SHARE_ACCESS_READ)
6928 6928 fp->rf_access_read++;
6929 6929 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6930 6930 fp->rf_access_write++;
6931 6931 fp->rf_share_access |= access;
6932 6932
6933 6933 /*
6934 6934 * Check for delegation here. if the deleg argument is not
6935 6935 * DELEG_ANY, then this is a reclaim from a client and
6936 6936 * we must honor the delegation requested. If necessary we can
6937 6937 * set the recall flag.
6938 6938 */
6939 6939
6940 6940 dsp = rfs4_grant_delegation(deleg, sp, &recall);
6941 6941
6942 6942 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6943 6943
6944 6944 next_stateid(&sp->rs_stateid);
6945 6945
6946 6946 resp->stateid = sp->rs_stateid.stateid;
6947 6947
6948 6948 rfs4_dbe_unlock(fp->rf_dbe);
6949 6949 rfs4_dbe_unlock(sp->rs_dbe);
6950 6950
6951 6951 if (dsp) {
6952 6952 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6953 6953 rfs4_deleg_state_rele(dsp);
6954 6954 }
6955 6955
6956 6956 rfs4_file_rele(fp);
6957 6957 rfs4_state_rele(sp);
6958 6958
6959 6959 resp->status = NFS4_OK;
6960 6960 }
6961 6961
6962 6962 /*ARGSUSED*/
6963 6963 static void
6964 6964 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6965 6965 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6966 6966 {
6967 6967 change_info4 *cinfo = &resp->cinfo;
6968 6968 bitmap4 *attrset = &resp->attrset;
6969 6969
6970 6970 if (args->opentype == OPEN4_NOCREATE)
6971 6971 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6972 6972 req, cs, args->share_access, cinfo);
6973 6973 else {
6974 6974 /* inhibit delegation grants during exclusive create */
6975 6975
6976 6976 if (args->mode == EXCLUSIVE4)
6977 6977 rfs4_disable_delegation();
6978 6978
6979 6979 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6980 6980 oo->ro_client->rc_clientid);
6981 6981 }
6982 6982
6983 6983 if (resp->status == NFS4_OK) {
6984 6984
6985 6985 /* cs->vp cs->fh now reference the desired file */
6986 6986
6987 6987 rfs4_do_open(cs, req, oo,
6988 6988 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6989 6989 args->share_access, args->share_deny, resp, 0);
6990 6990
6991 6991 /*
6992 6992 * If rfs4_createfile set attrset, we must
6993 6993 * clear this attrset before the response is copied.
6994 6994 */
6995 6995 if (resp->status != NFS4_OK && resp->attrset) {
6996 6996 resp->attrset = 0;
6997 6997 }
6998 6998 }
6999 6999 else
7000 7000 *cs->statusp = resp->status;
7001 7001
7002 7002 if (args->mode == EXCLUSIVE4)
7003 7003 rfs4_enable_delegation();
7004 7004 }
7005 7005
7006 7006 /*ARGSUSED*/
7007 7007 static void
7008 7008 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7009 7009 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7010 7010 {
7011 7011 change_info4 *cinfo = &resp->cinfo;
7012 7012 vattr_t va;
7013 7013 vtype_t v_type = cs->vp->v_type;
7014 7014 int error = 0;
7015 7015
7016 7016 /* Verify that we have a regular file */
7017 7017 if (v_type != VREG) {
7018 7018 if (v_type == VDIR)
7019 7019 resp->status = NFS4ERR_ISDIR;
7020 7020 else if (v_type == VLNK)
7021 7021 resp->status = NFS4ERR_SYMLINK;
7022 7022 else
7023 7023 resp->status = NFS4ERR_INVAL;
7024 7024 return;
7025 7025 }
7026 7026
7027 7027 va.va_mask = AT_MODE|AT_UID;
7028 7028 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7029 7029 if (error) {
7030 7030 resp->status = puterrno4(error);
7031 7031 return;
7032 7032 }
7033 7033
7034 7034 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7035 7035
7036 7036 /*
7037 7037 * Check if we have access to the file, Note the the file
7038 7038 * could have originally been open UNCHECKED or GUARDED
7039 7039 * with mode bits that will now fail, but there is nothing
7040 7040 * we can really do about that except in the case that the
7041 7041 * owner of the file is the one requesting the open.
7042 7042 */
7043 7043 if (crgetuid(cs->cr) != va.va_uid) {
7044 7044 resp->status = check_open_access(args->share_access, cs, req);
7045 7045 if (resp->status != NFS4_OK) {
7046 7046 return;
7047 7047 }
7048 7048 }
7049 7049
7050 7050 /*
7051 7051 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7052 7052 */
7053 7053 cinfo->before = 0;
7054 7054 cinfo->after = 0;
7055 7055 cinfo->atomic = FALSE;
7056 7056
7057 7057 rfs4_do_open(cs, req, oo,
7058 7058 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7059 7059 args->share_access, args->share_deny, resp, 0);
7060 7060 }
7061 7061
7062 7062 static void
7063 7063 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7064 7064 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7065 7065 {
7066 7066 int error;
7067 7067 nfsstat4 status;
7068 7068 stateid4 stateid =
7069 7069 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7070 7070 rfs4_deleg_state_t *dsp;
7071 7071
7072 7072 /*
7073 7073 * Find the state info from the stateid and confirm that the
7074 7074 * file is delegated. If the state openowner is the same as
7075 7075 * the supplied openowner we're done. If not, get the file
7076 7076 * info from the found state info. Use that file info to
7077 7077 * create the state for this lock owner. Note solaris doen't
7078 7078 * really need the pathname to find the file. We may want to
7079 7079 * lookup the pathname and make sure that the vp exist and
7080 7080 * matches the vp in the file structure. However it is
7081 7081 * possible that the pathname nolonger exists (local process
7082 7082 * unlinks the file), so this may not be that useful.
7083 7083 */
7084 7084
7085 7085 status = rfs4_get_deleg_state(&stateid, &dsp);
7086 7086 if (status != NFS4_OK) {
7087 7087 resp->status = status;
7088 7088 return;
7089 7089 }
7090 7090
7091 7091 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7092 7092
7093 7093 /*
7094 7094 * New lock owner, create state. Since this was probably called
7095 7095 * in response to a CB_RECALL we set deleg to DELEG_NONE
7096 7096 */
7097 7097
7098 7098 ASSERT(cs->vp != NULL);
7099 7099 VN_RELE(cs->vp);
7100 7100 VN_HOLD(dsp->rds_finfo->rf_vp);
7101 7101 cs->vp = dsp->rds_finfo->rf_vp;
7102 7102
7103 7103 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7104 7104 rfs4_deleg_state_rele(dsp);
7105 7105 *cs->statusp = resp->status = puterrno4(error);
7106 7106 return;
7107 7107 }
7108 7108
7109 7109 /* Mark progress for delegation returns */
7110 7110 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7111 7111 rfs4_deleg_state_rele(dsp);
7112 7112 rfs4_do_open(cs, req, oo, DELEG_NONE,
7113 7113 args->share_access, args->share_deny, resp, 1);
7114 7114 }
7115 7115
7116 7116 /*ARGSUSED*/
7117 7117 static void
7118 7118 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7119 7119 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7120 7120 {
7121 7121 /*
7122 7122 * Lookup the pathname, it must already exist since this file
7123 7123 * was delegated.
7124 7124 *
7125 7125 * Find the file and state info for this vp and open owner pair.
7126 7126 * check that they are in fact delegated.
7127 7127 * check that the state access and deny modes are the same.
7128 7128 *
7129 7129 * Return the delgation possibly seting the recall flag.
7130 7130 */
7131 7131 rfs4_file_t *fp;
7132 7132 rfs4_state_t *sp;
7133 7133 bool_t create = FALSE;
7134 7134 bool_t dcreate = FALSE;
7135 7135 rfs4_deleg_state_t *dsp;
7136 7136 nfsace4 *ace;
7137 7137
7138 7138 /* Note we ignore oflags */
7139 7139 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7140 7140 req, cs, args->share_access, &resp->cinfo);
7141 7141
7142 7142 if (resp->status != NFS4_OK) {
7143 7143 return;
7144 7144 }
7145 7145
7146 7146 /* get the file struct and hold a lock on it during initial open */
7147 7147 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7148 7148 if (fp == NULL) {
7149 7149 resp->status = NFS4ERR_RESOURCE;
7150 7150 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7151 7151 return;
7152 7152 }
7153 7153
7154 7154 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7155 7155 if (sp == NULL) {
7156 7156 resp->status = NFS4ERR_SERVERFAULT;
7157 7157 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7158 7158 rw_exit(&fp->rf_file_rwlock);
7159 7159 rfs4_file_rele(fp);
7160 7160 return;
7161 7161 }
7162 7162
7163 7163 rfs4_dbe_lock(sp->rs_dbe);
7164 7164 rfs4_dbe_lock(fp->rf_dbe);
7165 7165 if (args->share_access != sp->rs_share_access ||
7166 7166 args->share_deny != sp->rs_share_deny ||
7167 7167 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7168 7168 NFS4_DEBUG(rfs4_debug,
7169 7169 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7170 7170 rfs4_dbe_unlock(fp->rf_dbe);
7171 7171 rfs4_dbe_unlock(sp->rs_dbe);
7172 7172 rfs4_file_rele(fp);
7173 7173 rfs4_state_rele(sp);
7174 7174 resp->status = NFS4ERR_SERVERFAULT;
7175 7175 return;
7176 7176 }
7177 7177 rfs4_dbe_unlock(fp->rf_dbe);
7178 7178 rfs4_dbe_unlock(sp->rs_dbe);
7179 7179
7180 7180 dsp = rfs4_finddeleg(sp, &dcreate);
7181 7181 if (dsp == NULL) {
7182 7182 rfs4_state_rele(sp);
7183 7183 rfs4_file_rele(fp);
7184 7184 resp->status = NFS4ERR_SERVERFAULT;
7185 7185 return;
7186 7186 }
7187 7187
7188 7188 next_stateid(&sp->rs_stateid);
7189 7189
7190 7190 resp->stateid = sp->rs_stateid.stateid;
7191 7191
7192 7192 resp->delegation.delegation_type = dsp->rds_dtype;
7193 7193
7194 7194 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7195 7195 open_read_delegation4 *rv =
7196 7196 &resp->delegation.open_delegation4_u.read;
7197 7197
7198 7198 rv->stateid = dsp->rds_delegid.stateid;
7199 7199 rv->recall = FALSE; /* no policy in place to set to TRUE */
7200 7200 ace = &rv->permissions;
7201 7201 } else {
7202 7202 open_write_delegation4 *rv =
7203 7203 &resp->delegation.open_delegation4_u.write;
7204 7204
7205 7205 rv->stateid = dsp->rds_delegid.stateid;
7206 7206 rv->recall = FALSE; /* no policy in place to set to TRUE */
7207 7207 ace = &rv->permissions;
7208 7208 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7209 7209 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7210 7210 }
7211 7211
7212 7212 /* XXX For now */
7213 7213 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7214 7214 ace->flag = 0;
7215 7215 ace->access_mask = 0;
7216 7216 ace->who.utf8string_len = 0;
7217 7217 ace->who.utf8string_val = 0;
7218 7218
7219 7219 rfs4_deleg_state_rele(dsp);
7220 7220 rfs4_state_rele(sp);
7221 7221 rfs4_file_rele(fp);
7222 7222 }
7223 7223
7224 7224 typedef enum {
7225 7225 NFS4_CHKSEQ_OKAY = 0,
7226 7226 NFS4_CHKSEQ_REPLAY = 1,
7227 7227 NFS4_CHKSEQ_BAD = 2
7228 7228 } rfs4_chkseq_t;
7229 7229
7230 7230 /*
7231 7231 * Generic function for sequence number checks.
7232 7232 */
7233 7233 static rfs4_chkseq_t
7234 7234 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7235 7235 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7236 7236 {
7237 7237 /* Same sequence ids and matching operations? */
7238 7238 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7239 7239 if (copyres == TRUE) {
7240 7240 rfs4_free_reply(resop);
7241 7241 rfs4_copy_reply(resop, lastop);
7242 7242 }
7243 7243 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7244 7244 "Replayed SEQID %d\n", seqid));
7245 7245 return (NFS4_CHKSEQ_REPLAY);
7246 7246 }
7247 7247
7248 7248 /* If the incoming sequence is not the next expected then it is bad */
7249 7249 if (rqst_seq != seqid + 1) {
7250 7250 if (rqst_seq == seqid) {
7251 7251 NFS4_DEBUG(rfs4_debug,
7252 7252 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7253 7253 "but last op was %d current op is %d\n",
7254 7254 lastop->resop, resop->resop));
7255 7255 return (NFS4_CHKSEQ_BAD);
7256 7256 }
7257 7257 NFS4_DEBUG(rfs4_debug,
7258 7258 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7259 7259 rqst_seq, seqid));
7260 7260 return (NFS4_CHKSEQ_BAD);
7261 7261 }
7262 7262
7263 7263 /* Everything okay -- next expected */
7264 7264 return (NFS4_CHKSEQ_OKAY);
7265 7265 }
7266 7266
7267 7267
7268 7268 static rfs4_chkseq_t
7269 7269 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7270 7270 {
7271 7271 rfs4_chkseq_t rc;
7272 7272
7273 7273 rfs4_dbe_lock(op->ro_dbe);
7274 7274 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7275 7275 TRUE);
7276 7276 rfs4_dbe_unlock(op->ro_dbe);
7277 7277
7278 7278 if (rc == NFS4_CHKSEQ_OKAY)
7279 7279 rfs4_update_lease(op->ro_client);
7280 7280
7281 7281 return (rc);
7282 7282 }
7283 7283
7284 7284 static rfs4_chkseq_t
7285 7285 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7286 7286 {
7287 7287 rfs4_chkseq_t rc;
7288 7288
7289 7289 rfs4_dbe_lock(op->ro_dbe);
7290 7290 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7291 7291 olo_seqid, resop, FALSE);
7292 7292 rfs4_dbe_unlock(op->ro_dbe);
7293 7293
7294 7294 return (rc);
7295 7295 }
7296 7296
7297 7297 static rfs4_chkseq_t
7298 7298 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7299 7299 {
7300 7300 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7301 7301
7302 7302 rfs4_dbe_lock(lsp->rls_dbe);
7303 7303 if (!lsp->rls_skip_seqid_check)
7304 7304 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7305 7305 resop, TRUE);
7306 7306 rfs4_dbe_unlock(lsp->rls_dbe);
7307 7307
7308 7308 return (rc);
7309 7309 }
7310 7310
7311 7311 static void
7312 7312 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7313 7313 struct svc_req *req, struct compound_state *cs)
7314 7314 {
7315 7315 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7316 7316 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7317 7317 open_owner4 *owner = &args->owner;
7318 7318 open_claim_type4 claim = args->claim;
7319 7319 rfs4_client_t *cp;
7320 7320 rfs4_openowner_t *oo;
7321 7321 bool_t create;
7322 7322 bool_t replay = FALSE;
7323 7323 int can_reclaim;
7324 7324
7325 7325 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7326 7326 OPEN4args *, args);
7327 7327
7328 7328 if (cs->vp == NULL) {
7329 7329 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7330 7330 goto end;
7331 7331 }
7332 7332
7333 7333 /*
7334 7334 * Need to check clientid and lease expiration first based on
7335 7335 * error ordering and incrementing sequence id.
7336 7336 */
7337 7337 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7338 7338 if (cp == NULL) {
7339 7339 *cs->statusp = resp->status =
7340 7340 rfs4_check_clientid(&owner->clientid, 0);
7341 7341 goto end;
7342 7342 }
7343 7343
7344 7344 if (rfs4_lease_expired(cp)) {
7345 7345 rfs4_client_close(cp);
7346 7346 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7347 7347 goto end;
7348 7348 }
7349 7349 can_reclaim = cp->rc_can_reclaim;
7350 7350
7351 7351 /*
7352 7352 * Find the open_owner for use from this point forward. Take
7353 7353 * care in updating the sequence id based on the type of error
7354 7354 * being returned.
7355 7355 */
7356 7356 retry:
7357 7357 create = TRUE;
7358 7358 oo = rfs4_findopenowner(owner, &create, args->seqid);
7359 7359 if (oo == NULL) {
7360 7360 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7361 7361 rfs4_client_rele(cp);
7362 7362 goto end;
7363 7363 }
7364 7364
7365 7365 /* Hold off access to the sequence space while the open is done */
7366 7366 rfs4_sw_enter(&oo->ro_sw);
7367 7367
7368 7368 /*
7369 7369 * If the open_owner existed before at the server, then check
7370 7370 * the sequence id.
7371 7371 */
7372 7372 if (!create && !oo->ro_postpone_confirm) {
7373 7373 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7374 7374 case NFS4_CHKSEQ_BAD:
7375 7375 if ((args->seqid > oo->ro_open_seqid) &&
7376 7376 oo->ro_need_confirm) {
7377 7377 rfs4_free_opens(oo, TRUE, FALSE);
7378 7378 rfs4_sw_exit(&oo->ro_sw);
7379 7379 rfs4_openowner_rele(oo);
7380 7380 goto retry;
7381 7381 }
7382 7382 resp->status = NFS4ERR_BAD_SEQID;
7383 7383 goto out;
7384 7384 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7385 7385 replay = TRUE;
7386 7386 goto out;
7387 7387 default:
7388 7388 break;
7389 7389 }
7390 7390
7391 7391 /*
7392 7392 * Sequence was ok and open owner exists
7393 7393 * check to see if we have yet to see an
7394 7394 * open_confirm.
7395 7395 */
7396 7396 if (oo->ro_need_confirm) {
7397 7397 rfs4_free_opens(oo, TRUE, FALSE);
7398 7398 rfs4_sw_exit(&oo->ro_sw);
7399 7399 rfs4_openowner_rele(oo);
7400 7400 goto retry;
7401 7401 }
7402 7402 }
7403 7403 /* Grace only applies to regular-type OPENs */
7404 7404 if (rfs4_clnt_in_grace(cp) &&
7405 7405 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7406 7406 *cs->statusp = resp->status = NFS4ERR_GRACE;
7407 7407 goto out;
7408 7408 }
7409 7409
7410 7410 /*
7411 7411 * If previous state at the server existed then can_reclaim
7412 7412 * will be set. If not reply NFS4ERR_NO_GRACE to the
7413 7413 * client.
7414 7414 */
7415 7415 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7416 7416 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7417 7417 goto out;
7418 7418 }
7419 7419
7420 7420
7421 7421 /*
7422 7422 * Reject the open if the client has missed the grace period
7423 7423 */
7424 7424 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7425 7425 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7426 7426 goto out;
7427 7427 }
7428 7428
7429 7429 /* Couple of up-front bookkeeping items */
7430 7430 if (oo->ro_need_confirm) {
7431 7431 /*
7432 7432 * If this is a reclaim OPEN then we should not ask
7433 7433 * for a confirmation of the open_owner per the
7434 7434 * protocol specification.
7435 7435 */
7436 7436 if (claim == CLAIM_PREVIOUS)
7437 7437 oo->ro_need_confirm = FALSE;
7438 7438 else
7439 7439 resp->rflags |= OPEN4_RESULT_CONFIRM;
7440 7440 }
7441 7441 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7442 7442
7443 7443 /*
7444 7444 * If there is an unshared filesystem mounted on this vnode,
7445 7445 * do not allow to open/create in this directory.
7446 7446 */
7447 7447 if (vn_ismntpt(cs->vp)) {
7448 7448 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7449 7449 goto out;
7450 7450 }
7451 7451
7452 7452 /*
7453 7453 * access must READ, WRITE, or BOTH. No access is invalid.
7454 7454 * deny can be READ, WRITE, BOTH, or NONE.
7455 7455 * bits not defined for access/deny are invalid.
7456 7456 */
7457 7457 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7458 7458 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7459 7459 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7460 7460 *cs->statusp = resp->status = NFS4ERR_INVAL;
7461 7461 goto out;
7462 7462 }
7463 7463
7464 7464
7465 7465 /*
7466 7466 * make sure attrset is zero before response is built.
7467 7467 */
7468 7468 resp->attrset = 0;
7469 7469
7470 7470 switch (claim) {
7471 7471 case CLAIM_NULL:
7472 7472 rfs4_do_opennull(cs, req, args, oo, resp);
7473 7473 break;
7474 7474 case CLAIM_PREVIOUS:
7475 7475 rfs4_do_openprev(cs, req, args, oo, resp);
7476 7476 break;
7477 7477 case CLAIM_DELEGATE_CUR:
7478 7478 rfs4_do_opendelcur(cs, req, args, oo, resp);
7479 7479 break;
7480 7480 case CLAIM_DELEGATE_PREV:
7481 7481 rfs4_do_opendelprev(cs, req, args, oo, resp);
7482 7482 break;
7483 7483 default:
7484 7484 resp->status = NFS4ERR_INVAL;
7485 7485 break;
7486 7486 }
7487 7487
7488 7488 out:
7489 7489 rfs4_client_rele(cp);
7490 7490
7491 7491 /* Catch sequence id handling here to make it a little easier */
7492 7492 switch (resp->status) {
7493 7493 case NFS4ERR_BADXDR:
7494 7494 case NFS4ERR_BAD_SEQID:
7495 7495 case NFS4ERR_BAD_STATEID:
7496 7496 case NFS4ERR_NOFILEHANDLE:
7497 7497 case NFS4ERR_RESOURCE:
7498 7498 case NFS4ERR_STALE_CLIENTID:
7499 7499 case NFS4ERR_STALE_STATEID:
7500 7500 /*
7501 7501 * The protocol states that if any of these errors are
7502 7502 * being returned, the sequence id should not be
7503 7503 * incremented. Any other return requires an
7504 7504 * increment.
7505 7505 */
7506 7506 break;
7507 7507 default:
7508 7508 /* Always update the lease in this case */
7509 7509 rfs4_update_lease(oo->ro_client);
7510 7510
7511 7511 /* Regular response - copy the result */
7512 7512 if (!replay)
7513 7513 rfs4_update_open_resp(oo, resop, &cs->fh);
7514 7514
7515 7515 /*
7516 7516 * REPLAY case: Only if the previous response was OK
7517 7517 * do we copy the filehandle. If not OK, no
7518 7518 * filehandle to copy.
7519 7519 */
7520 7520 if (replay == TRUE &&
7521 7521 resp->status == NFS4_OK &&
7522 7522 oo->ro_reply_fh.nfs_fh4_val) {
7523 7523 /*
7524 7524 * If this is a replay, we must restore the
7525 7525 * current filehandle/vp to that of what was
7526 7526 * returned originally. Try our best to do
7527 7527 * it.
7528 7528 */
7529 7529 nfs_fh4_fmt_t *fh_fmtp =
7530 7530 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7531 7531
7532 7532 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7533 7533 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7534 7534
7535 7535 if (cs->exi == NULL) {
7536 7536 resp->status = NFS4ERR_STALE;
7537 7537 goto finish;
7538 7538 }
7539 7539
7540 7540 VN_RELE(cs->vp);
7541 7541
7542 7542 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7543 7543 &resp->status);
7544 7544
7545 7545 if (cs->vp == NULL)
7546 7546 goto finish;
7547 7547
7548 7548 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7549 7549 }
7550 7550
7551 7551 /*
7552 7552 * If this was a replay, no need to update the
7553 7553 * sequence id. If the open_owner was not created on
7554 7554 * this pass, then update. The first use of an
7555 7555 * open_owner will not bump the sequence id.
7556 7556 */
7557 7557 if (replay == FALSE && !create)
7558 7558 rfs4_update_open_sequence(oo);
7559 7559 /*
7560 7560 * If the client is receiving an error and the
7561 7561 * open_owner needs to be confirmed, there is no way
7562 7562 * to notify the client of this fact ignoring the fact
7563 7563 * that the server has no method of returning a
7564 7564 * stateid to confirm. Therefore, the server needs to
7565 7565 * mark this open_owner in a way as to avoid the
7566 7566 * sequence id checking the next time the client uses
7567 7567 * this open_owner.
7568 7568 */
7569 7569 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7570 7570 oo->ro_postpone_confirm = TRUE;
7571 7571 /*
7572 7572 * If OK response then clear the postpone flag and
7573 7573 * reset the sequence id to keep in sync with the
7574 7574 * client.
7575 7575 */
7576 7576 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7577 7577 oo->ro_postpone_confirm = FALSE;
7578 7578 oo->ro_open_seqid = args->seqid;
7579 7579 }
7580 7580 break;
7581 7581 }
7582 7582
7583 7583 finish:
7584 7584 *cs->statusp = resp->status;
7585 7585
7586 7586 rfs4_sw_exit(&oo->ro_sw);
7587 7587 rfs4_openowner_rele(oo);
7588 7588
7589 7589 end:
7590 7590 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7591 7591 OPEN4res *, resp);
7592 7592 }
7593 7593
7594 7594 /*ARGSUSED*/
7595 7595 void
7596 7596 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7597 7597 struct svc_req *req, struct compound_state *cs)
7598 7598 {
7599 7599 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7600 7600 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7601 7601 rfs4_state_t *sp;
7602 7602 nfsstat4 status;
7603 7603
7604 7604 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7605 7605 OPEN_CONFIRM4args *, args);
7606 7606
7607 7607 if (cs->vp == NULL) {
7608 7608 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7609 7609 goto out;
7610 7610 }
7611 7611
7612 7612 if (cs->vp->v_type != VREG) {
7613 7613 *cs->statusp = resp->status =
7614 7614 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7615 7615 return;
7616 7616 }
7617 7617
7618 7618 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7619 7619 if (status != NFS4_OK) {
7620 7620 *cs->statusp = resp->status = status;
7621 7621 goto out;
7622 7622 }
7623 7623
7624 7624 /* Ensure specified filehandle matches */
7625 7625 if (cs->vp != sp->rs_finfo->rf_vp) {
7626 7626 rfs4_state_rele(sp);
7627 7627 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7628 7628 goto out;
7629 7629 }
7630 7630
7631 7631 /* hold off other access to open_owner while we tinker */
7632 7632 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7633 7633
7634 7634 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7635 7635 case NFS4_CHECK_STATEID_OKAY:
7636 7636 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7637 7637 resop) != 0) {
7638 7638 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7639 7639 break;
7640 7640 }
7641 7641 /*
7642 7642 * If it is the appropriate stateid and determined to
7643 7643 * be "OKAY" then this means that the stateid does not
7644 7644 * need to be confirmed and the client is in error for
7645 7645 * sending an OPEN_CONFIRM.
7646 7646 */
7647 7647 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7648 7648 break;
7649 7649 case NFS4_CHECK_STATEID_OLD:
7650 7650 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7651 7651 break;
7652 7652 case NFS4_CHECK_STATEID_BAD:
7653 7653 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7654 7654 break;
7655 7655 case NFS4_CHECK_STATEID_EXPIRED:
7656 7656 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7657 7657 break;
7658 7658 case NFS4_CHECK_STATEID_CLOSED:
7659 7659 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7660 7660 break;
7661 7661 case NFS4_CHECK_STATEID_REPLAY:
7662 7662 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7663 7663 resop)) {
7664 7664 case NFS4_CHKSEQ_OKAY:
7665 7665 /*
7666 7666 * This is replayed stateid; if seqid matches
7667 7667 * next expected, then client is using wrong seqid.
7668 7668 */
7669 7669 /* fall through */
7670 7670 case NFS4_CHKSEQ_BAD:
7671 7671 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7672 7672 break;
7673 7673 case NFS4_CHKSEQ_REPLAY:
7674 7674 /*
7675 7675 * Note this case is the duplicate case so
7676 7676 * resp->status is already set.
7677 7677 */
7678 7678 *cs->statusp = resp->status;
7679 7679 rfs4_update_lease(sp->rs_owner->ro_client);
7680 7680 break;
7681 7681 }
7682 7682 break;
7683 7683 case NFS4_CHECK_STATEID_UNCONFIRMED:
7684 7684 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7685 7685 resop) != NFS4_CHKSEQ_OKAY) {
7686 7686 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7687 7687 break;
7688 7688 }
7689 7689 *cs->statusp = resp->status = NFS4_OK;
7690 7690
7691 7691 next_stateid(&sp->rs_stateid);
7692 7692 resp->open_stateid = sp->rs_stateid.stateid;
7693 7693 sp->rs_owner->ro_need_confirm = FALSE;
7694 7694 rfs4_update_lease(sp->rs_owner->ro_client);
7695 7695 rfs4_update_open_sequence(sp->rs_owner);
7696 7696 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7697 7697 break;
7698 7698 default:
7699 7699 ASSERT(FALSE);
7700 7700 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7701 7701 break;
7702 7702 }
7703 7703 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7704 7704 rfs4_state_rele(sp);
7705 7705
7706 7706 out:
7707 7707 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7708 7708 OPEN_CONFIRM4res *, resp);
7709 7709 }
7710 7710
7711 7711 /*ARGSUSED*/
7712 7712 void
7713 7713 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7714 7714 struct svc_req *req, struct compound_state *cs)
7715 7715 {
7716 7716 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7717 7717 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7718 7718 uint32_t access = args->share_access;
7719 7719 uint32_t deny = args->share_deny;
7720 7720 nfsstat4 status;
7721 7721 rfs4_state_t *sp;
7722 7722 rfs4_file_t *fp;
7723 7723 int fflags = 0;
7724 7724
7725 7725 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7726 7726 OPEN_DOWNGRADE4args *, args);
7727 7727
7728 7728 if (cs->vp == NULL) {
7729 7729 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7730 7730 goto out;
7731 7731 }
7732 7732
7733 7733 if (cs->vp->v_type != VREG) {
7734 7734 *cs->statusp = resp->status = NFS4ERR_INVAL;
7735 7735 return;
7736 7736 }
7737 7737
7738 7738 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7739 7739 if (status != NFS4_OK) {
7740 7740 *cs->statusp = resp->status = status;
7741 7741 goto out;
7742 7742 }
7743 7743
7744 7744 /* Ensure specified filehandle matches */
7745 7745 if (cs->vp != sp->rs_finfo->rf_vp) {
7746 7746 rfs4_state_rele(sp);
7747 7747 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7748 7748 goto out;
7749 7749 }
7750 7750
7751 7751 /* hold off other access to open_owner while we tinker */
7752 7752 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7753 7753
7754 7754 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7755 7755 case NFS4_CHECK_STATEID_OKAY:
7756 7756 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7757 7757 resop) != NFS4_CHKSEQ_OKAY) {
7758 7758 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7759 7759 goto end;
7760 7760 }
7761 7761 break;
7762 7762 case NFS4_CHECK_STATEID_OLD:
7763 7763 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7764 7764 goto end;
7765 7765 case NFS4_CHECK_STATEID_BAD:
7766 7766 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7767 7767 goto end;
7768 7768 case NFS4_CHECK_STATEID_EXPIRED:
7769 7769 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7770 7770 goto end;
7771 7771 case NFS4_CHECK_STATEID_CLOSED:
7772 7772 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7773 7773 goto end;
7774 7774 case NFS4_CHECK_STATEID_UNCONFIRMED:
7775 7775 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7776 7776 goto end;
7777 7777 case NFS4_CHECK_STATEID_REPLAY:
7778 7778 /* Check the sequence id for the open owner */
7779 7779 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7780 7780 resop)) {
7781 7781 case NFS4_CHKSEQ_OKAY:
7782 7782 /*
7783 7783 * This is replayed stateid; if seqid matches
7784 7784 * next expected, then client is using wrong seqid.
7785 7785 */
7786 7786 /* fall through */
7787 7787 case NFS4_CHKSEQ_BAD:
7788 7788 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7789 7789 goto end;
7790 7790 case NFS4_CHKSEQ_REPLAY:
7791 7791 /*
7792 7792 * Note this case is the duplicate case so
7793 7793 * resp->status is already set.
7794 7794 */
7795 7795 *cs->statusp = resp->status;
7796 7796 rfs4_update_lease(sp->rs_owner->ro_client);
7797 7797 goto end;
7798 7798 }
7799 7799 break;
7800 7800 default:
7801 7801 ASSERT(FALSE);
7802 7802 break;
7803 7803 }
7804 7804
7805 7805 rfs4_dbe_lock(sp->rs_dbe);
7806 7806 /*
7807 7807 * Check that the new access modes and deny modes are valid.
7808 7808 * Check that no invalid bits are set.
7809 7809 */
7810 7810 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7811 7811 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7812 7812 *cs->statusp = resp->status = NFS4ERR_INVAL;
7813 7813 rfs4_update_open_sequence(sp->rs_owner);
7814 7814 rfs4_dbe_unlock(sp->rs_dbe);
7815 7815 goto end;
7816 7816 }
7817 7817
7818 7818 /*
7819 7819 * The new modes must be a subset of the current modes and
7820 7820 * the access must specify at least one mode. To test that
7821 7821 * the new mode is a subset of the current modes we bitwise
7822 7822 * AND them together and check that the result equals the new
7823 7823 * mode. For example:
7824 7824 * New mode, access == R and current mode, sp->rs_open_access == RW
7825 7825 * access & sp->rs_open_access == R == access, so the new access mode
7826 7826 * is valid. Consider access == RW, sp->rs_open_access = R
7827 7827 * access & sp->rs_open_access == R != access, so the new access mode
7828 7828 * is invalid.
7829 7829 */
7830 7830 if ((access & sp->rs_open_access) != access ||
7831 7831 (deny & sp->rs_open_deny) != deny ||
7832 7832 (access &
7833 7833 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7834 7834 *cs->statusp = resp->status = NFS4ERR_INVAL;
7835 7835 rfs4_update_open_sequence(sp->rs_owner);
7836 7836 rfs4_dbe_unlock(sp->rs_dbe);
7837 7837 goto end;
7838 7838 }
7839 7839
7840 7840 /*
7841 7841 * Release any share locks associated with this stateID.
7842 7842 * Strictly speaking, this violates the spec because the
7843 7843 * spec effectively requires that open downgrade be atomic.
7844 7844 * At present, fs_shrlock does not have this capability.
7845 7845 */
7846 7846 (void) rfs4_unshare(sp);
7847 7847
7848 7848 status = rfs4_share(sp, access, deny);
7849 7849 if (status != NFS4_OK) {
7850 7850 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7851 7851 rfs4_update_open_sequence(sp->rs_owner);
7852 7852 rfs4_dbe_unlock(sp->rs_dbe);
7853 7853 goto end;
7854 7854 }
7855 7855
7856 7856 fp = sp->rs_finfo;
7857 7857 rfs4_dbe_lock(fp->rf_dbe);
7858 7858
7859 7859 /*
7860 7860 * If the current mode has deny read and the new mode
7861 7861 * does not, decrement the number of deny read mode bits
7862 7862 * and if it goes to zero turn off the deny read bit
7863 7863 * on the file.
7864 7864 */
7865 7865 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7866 7866 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7867 7867 fp->rf_deny_read--;
7868 7868 if (fp->rf_deny_read == 0)
7869 7869 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7870 7870 }
7871 7871
7872 7872 /*
7873 7873 * If the current mode has deny write and the new mode
7874 7874 * does not, decrement the number of deny write mode bits
7875 7875 * and if it goes to zero turn off the deny write bit
7876 7876 * on the file.
7877 7877 */
7878 7878 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7879 7879 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7880 7880 fp->rf_deny_write--;
7881 7881 if (fp->rf_deny_write == 0)
7882 7882 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7883 7883 }
7884 7884
7885 7885 /*
7886 7886 * If the current mode has access read and the new mode
7887 7887 * does not, decrement the number of access read mode bits
7888 7888 * and if it goes to zero turn off the access read bit
7889 7889 * on the file. set fflags to FREAD for the call to
7890 7890 * vn_open_downgrade().
7891 7891 */
7892 7892 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7893 7893 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7894 7894 fp->rf_access_read--;
7895 7895 if (fp->rf_access_read == 0)
7896 7896 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7897 7897 fflags |= FREAD;
7898 7898 }
7899 7899
7900 7900 /*
7901 7901 * If the current mode has access write and the new mode
7902 7902 * does not, decrement the number of access write mode bits
7903 7903 * and if it goes to zero turn off the access write bit
7904 7904 * on the file. set fflags to FWRITE for the call to
7905 7905 * vn_open_downgrade().
7906 7906 */
7907 7907 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7908 7908 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7909 7909 fp->rf_access_write--;
7910 7910 if (fp->rf_access_write == 0)
7911 7911 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7912 7912 fflags |= FWRITE;
7913 7913 }
7914 7914
7915 7915 /* Check that the file is still accessible */
7916 7916 ASSERT(fp->rf_share_access);
7917 7917
7918 7918 rfs4_dbe_unlock(fp->rf_dbe);
7919 7919
7920 7920 /* now set the new open access and deny modes */
7921 7921 sp->rs_open_access = access;
7922 7922 sp->rs_open_deny = deny;
7923 7923
7924 7924 /*
7925 7925 * we successfully downgraded the share lock, now we need to downgrade
7926 7926 * the open. it is possible that the downgrade was only for a deny
7927 7927 * mode and we have nothing else to do.
7928 7928 */
7929 7929 if ((fflags & (FREAD|FWRITE)) != 0)
7930 7930 vn_open_downgrade(cs->vp, fflags);
7931 7931
7932 7932 /* Update the stateid */
7933 7933 next_stateid(&sp->rs_stateid);
7934 7934 resp->open_stateid = sp->rs_stateid.stateid;
7935 7935
7936 7936 rfs4_dbe_unlock(sp->rs_dbe);
7937 7937
7938 7938 *cs->statusp = resp->status = NFS4_OK;
7939 7939 /* Update the lease */
7940 7940 rfs4_update_lease(sp->rs_owner->ro_client);
7941 7941 /* And the sequence */
7942 7942 rfs4_update_open_sequence(sp->rs_owner);
7943 7943 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7944 7944
7945 7945 end:
7946 7946 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7947 7947 rfs4_state_rele(sp);
7948 7948 out:
7949 7949 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7950 7950 OPEN_DOWNGRADE4res *, resp);
7951 7951 }
7952 7952
7953 7953 static void *
7954 7954 memstr(const void *s1, const char *s2, size_t n)
7955 7955 {
7956 7956 size_t l = strlen(s2);
7957 7957 char *p = (char *)s1;
7958 7958
7959 7959 while (n >= l) {
7960 7960 if (bcmp(p, s2, l) == 0)
7961 7961 return (p);
7962 7962 p++;
7963 7963 n--;
7964 7964 }
7965 7965
7966 7966 return (NULL);
7967 7967 }
7968 7968
7969 7969 /*
7970 7970 * The logic behind this function is detailed in the NFSv4 RFC in the
7971 7971 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
7972 7972 * that section for explicit guidance to server behavior for
7973 7973 * SETCLIENTID.
7974 7974 */
7975 7975 void
7976 7976 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7977 7977 struct svc_req *req, struct compound_state *cs)
7978 7978 {
7979 7979 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7980 7980 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7981 7981 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7982 7982 rfs4_clntip_t *ci;
7983 7983 bool_t create;
7984 7984 char *addr, *netid;
7985 7985 int len;
7986 7986
7987 7987 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7988 7988 SETCLIENTID4args *, args);
7989 7989 retry:
7990 7990 newcp = cp_confirmed = cp_unconfirmed = NULL;
7991 7991
7992 7992 /*
7993 7993 * Save the caller's IP address
7994 7994 */
7995 7995 args->client.cl_addr =
7996 7996 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7997 7997
7998 7998 /*
7999 7999 * Record if it is a Solaris client that cannot handle referrals.
8000 8000 */
8001 8001 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8002 8002 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8003 8003 /* Add a "yes, it's downrev" record */
8004 8004 create = TRUE;
8005 8005 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8006 8006 ASSERT(ci != NULL);
8007 8007 rfs4_dbe_rele(ci->ri_dbe);
8008 8008 } else {
8009 8009 /* Remove any previous record */
8010 8010 rfs4_invalidate_clntip(args->client.cl_addr);
8011 8011 }
8012 8012
8013 8013 /*
8014 8014 * In search of an EXISTING client matching the incoming
8015 8015 * request to establish a new client identifier at the server
8016 8016 */
8017 8017 create = TRUE;
8018 8018 cp = rfs4_findclient(&args->client, &create, NULL);
8019 8019
8020 8020 /* Should never happen */
8021 8021 ASSERT(cp != NULL);
8022 8022
8023 8023 if (cp == NULL) {
8024 8024 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8025 8025 goto out;
8026 8026 }
8027 8027
8028 8028 /*
8029 8029 * Easiest case. Client identifier is newly created and is
8030 8030 * unconfirmed. Also note that for this case, no other
8031 8031 * entries exist for the client identifier. Nothing else to
8032 8032 * check. Just setup the response and respond.
8033 8033 */
8034 8034 if (create) {
8035 8035 *cs->statusp = res->status = NFS4_OK;
8036 8036 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8037 8037 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8038 8038 cp->rc_confirm_verf;
8039 8039 /* Setup callback information; CB_NULL confirmation later */
8040 8040 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8041 8041
8042 8042 rfs4_client_rele(cp);
8043 8043 goto out;
8044 8044 }
8045 8045
8046 8046 /*
8047 8047 * An existing, confirmed client may exist but it may not have
8048 8048 * been active for at least one lease period. If so, then
8049 8049 * "close" the client and create a new client identifier
8050 8050 */
8051 8051 if (rfs4_lease_expired(cp)) {
8052 8052 rfs4_client_close(cp);
8053 8053 goto retry;
8054 8054 }
8055 8055
8056 8056 if (cp->rc_need_confirm == TRUE)
8057 8057 cp_unconfirmed = cp;
8058 8058 else
8059 8059 cp_confirmed = cp;
8060 8060
8061 8061 cp = NULL;
8062 8062
8063 8063 /*
8064 8064 * We have a confirmed client, now check for an
8065 8065 * unconfimred entry
8066 8066 */
8067 8067 if (cp_confirmed) {
8068 8068 /* If creds don't match then client identifier is inuse */
8069 8069 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8070 8070 rfs4_cbinfo_t *cbp;
8071 8071 /*
8072 8072 * Some one else has established this client
8073 8073 * id. Try and say * who they are. We will use
8074 8074 * the call back address supplied by * the
8075 8075 * first client.
8076 8076 */
8077 8077 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8078 8078
8079 8079 addr = netid = NULL;
8080 8080
8081 8081 cbp = &cp_confirmed->rc_cbinfo;
8082 8082 if (cbp->cb_callback.cb_location.r_addr &&
8083 8083 cbp->cb_callback.cb_location.r_netid) {
8084 8084 cb_client4 *cbcp = &cbp->cb_callback;
8085 8085
8086 8086 len = strlen(cbcp->cb_location.r_addr)+1;
8087 8087 addr = kmem_alloc(len, KM_SLEEP);
8088 8088 bcopy(cbcp->cb_location.r_addr, addr, len);
8089 8089 len = strlen(cbcp->cb_location.r_netid)+1;
8090 8090 netid = kmem_alloc(len, KM_SLEEP);
8091 8091 bcopy(cbcp->cb_location.r_netid, netid, len);
8092 8092 }
8093 8093
8094 8094 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8095 8095 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8096 8096
8097 8097 rfs4_client_rele(cp_confirmed);
8098 8098 }
8099 8099
8100 8100 /*
8101 8101 * Confirmed, creds match, and verifier matches; must
8102 8102 * be an update of the callback info
8103 8103 */
8104 8104 if (cp_confirmed->rc_nfs_client.verifier ==
8105 8105 args->client.verifier) {
8106 8106 /* Setup callback information */
8107 8107 rfs4_client_setcb(cp_confirmed, &args->callback,
8108 8108 args->callback_ident);
8109 8109
8110 8110 /* everything okay -- move ahead */
8111 8111 *cs->statusp = res->status = NFS4_OK;
8112 8112 res->SETCLIENTID4res_u.resok4.clientid =
8113 8113 cp_confirmed->rc_clientid;
8114 8114
8115 8115 /* update the confirm_verifier and return it */
8116 8116 rfs4_client_scv_next(cp_confirmed);
8117 8117 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8118 8118 cp_confirmed->rc_confirm_verf;
8119 8119
8120 8120 rfs4_client_rele(cp_confirmed);
8121 8121 goto out;
8122 8122 }
8123 8123
8124 8124 /*
8125 8125 * Creds match but the verifier doesn't. Must search
8126 8126 * for an unconfirmed client that would be replaced by
8127 8127 * this request.
8128 8128 */
8129 8129 create = FALSE;
8130 8130 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8131 8131 cp_confirmed);
8132 8132 }
8133 8133
8134 8134 /*
8135 8135 * At this point, we have taken care of the brand new client
8136 8136 * struct, INUSE case, update of an existing, and confirmed
8137 8137 * client struct.
8138 8138 */
8139 8139
8140 8140 /*
8141 8141 * check to see if things have changed while we originally
8142 8142 * picked up the client struct. If they have, then return and
8143 8143 * retry the processing of this SETCLIENTID request.
8144 8144 */
8145 8145 if (cp_unconfirmed) {
8146 8146 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8147 8147 if (!cp_unconfirmed->rc_need_confirm) {
8148 8148 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8149 8149 rfs4_client_rele(cp_unconfirmed);
8150 8150 if (cp_confirmed)
8151 8151 rfs4_client_rele(cp_confirmed);
8152 8152 goto retry;
8153 8153 }
8154 8154 /* do away with the old unconfirmed one */
8155 8155 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8156 8156 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8157 8157 rfs4_client_rele(cp_unconfirmed);
8158 8158 cp_unconfirmed = NULL;
8159 8159 }
8160 8160
8161 8161 /*
8162 8162 * This search will temporarily hide the confirmed client
8163 8163 * struct while a new client struct is created as the
8164 8164 * unconfirmed one.
8165 8165 */
8166 8166 create = TRUE;
8167 8167 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8168 8168
8169 8169 ASSERT(newcp != NULL);
8170 8170
8171 8171 if (newcp == NULL) {
8172 8172 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8173 8173 rfs4_client_rele(cp_confirmed);
8174 8174 goto out;
8175 8175 }
8176 8176
8177 8177 /*
8178 8178 * If one was not created, then a similar request must be in
8179 8179 * process so release and start over with this one
8180 8180 */
8181 8181 if (create != TRUE) {
8182 8182 rfs4_client_rele(newcp);
8183 8183 if (cp_confirmed)
8184 8184 rfs4_client_rele(cp_confirmed);
8185 8185 goto retry;
8186 8186 }
8187 8187
8188 8188 *cs->statusp = res->status = NFS4_OK;
8189 8189 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8190 8190 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8191 8191 newcp->rc_confirm_verf;
8192 8192 /* Setup callback information; CB_NULL confirmation later */
8193 8193 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8194 8194
8195 8195 newcp->rc_cp_confirmed = cp_confirmed;
8196 8196
8197 8197 rfs4_client_rele(newcp);
8198 8198
8199 8199 out:
8200 8200 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8201 8201 SETCLIENTID4res *, res);
8202 8202 }
8203 8203
8204 8204 /*ARGSUSED*/
8205 8205 void
8206 8206 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8207 8207 struct svc_req *req, struct compound_state *cs)
8208 8208 {
8209 8209 SETCLIENTID_CONFIRM4args *args =
8210 8210 &argop->nfs_argop4_u.opsetclientid_confirm;
8211 8211 SETCLIENTID_CONFIRM4res *res =
8212 8212 &resop->nfs_resop4_u.opsetclientid_confirm;
8213 8213 rfs4_client_t *cp, *cptoclose = NULL;
8214 8214 nfs4_srv_t *nsrv4;
8215 8215
8216 8216 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8217 8217 struct compound_state *, cs,
8218 8218 SETCLIENTID_CONFIRM4args *, args);
8219 8219
8220 8220 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
8221 8221 *cs->statusp = res->status = NFS4_OK;
8222 8222
8223 8223 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8224 8224
8225 8225 if (cp == NULL) {
8226 8226 *cs->statusp = res->status =
8227 8227 rfs4_check_clientid(&args->clientid, 1);
8228 8228 goto out;
8229 8229 }
8230 8230
8231 8231 if (!creds_ok(cp, req, cs)) {
8232 8232 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8233 8233 rfs4_client_rele(cp);
8234 8234 goto out;
8235 8235 }
8236 8236
8237 8237 /* If the verifier doesn't match, the record doesn't match */
8238 8238 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8239 8239 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8240 8240 rfs4_client_rele(cp);
8241 8241 goto out;
8242 8242 }
8243 8243
8244 8244 rfs4_dbe_lock(cp->rc_dbe);
8245 8245 cp->rc_need_confirm = FALSE;
8246 8246 if (cp->rc_cp_confirmed) {
8247 8247 cptoclose = cp->rc_cp_confirmed;
8248 8248 cptoclose->rc_ss_remove = 1;
8249 8249 cp->rc_cp_confirmed = NULL;
8250 8250 }
8251 8251
8252 8252 /*
8253 8253 * Update the client's associated server instance, if it's changed
8254 8254 * since the client was created.
8255 8255 */
8256 8256 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8257 8257 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8258 8258
8259 8259 /*
8260 8260 * Record clientid in stable storage.
8261 8261 * Must be done after server instance has been assigned.
8262 8262 */
8263 8263 rfs4_ss_clid(nsrv4, cp);
8264 8264
8265 8265 rfs4_dbe_unlock(cp->rc_dbe);
8266 8266
8267 8267 if (cptoclose)
8268 8268 /* don't need to rele, client_close does it */
8269 8269 rfs4_client_close(cptoclose);
8270 8270
8271 8271 /* If needed, initiate CB_NULL call for callback path */
8272 8272 rfs4_deleg_cb_check(cp);
8273 8273 rfs4_update_lease(cp);
8274 8274
8275 8275 /*
8276 8276 * Check to see if client can perform reclaims
8277 8277 */
8278 8278 rfs4_ss_chkclid(nsrv4, cp);
8279 8279
8280 8280 rfs4_client_rele(cp);
8281 8281
8282 8282 out:
8283 8283 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8284 8284 struct compound_state *, cs,
8285 8285 SETCLIENTID_CONFIRM4 *, res);
8286 8286 }
8287 8287
8288 8288
8289 8289 /*ARGSUSED*/
8290 8290 void
8291 8291 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8292 8292 struct svc_req *req, struct compound_state *cs)
8293 8293 {
8294 8294 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8295 8295 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8296 8296 rfs4_state_t *sp;
8297 8297 nfsstat4 status;
8298 8298
8299 8299 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8300 8300 CLOSE4args *, args);
8301 8301
8302 8302 if (cs->vp == NULL) {
8303 8303 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8304 8304 goto out;
8305 8305 }
8306 8306
8307 8307 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8308 8308 if (status != NFS4_OK) {
8309 8309 *cs->statusp = resp->status = status;
8310 8310 goto out;
8311 8311 }
8312 8312
8313 8313 /* Ensure specified filehandle matches */
8314 8314 if (cs->vp != sp->rs_finfo->rf_vp) {
8315 8315 rfs4_state_rele(sp);
8316 8316 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8317 8317 goto out;
8318 8318 }
8319 8319
8320 8320 /* hold off other access to open_owner while we tinker */
8321 8321 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8322 8322
8323 8323 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8324 8324 case NFS4_CHECK_STATEID_OKAY:
8325 8325 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8326 8326 resop) != NFS4_CHKSEQ_OKAY) {
8327 8327 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8328 8328 goto end;
8329 8329 }
8330 8330 break;
8331 8331 case NFS4_CHECK_STATEID_OLD:
8332 8332 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8333 8333 goto end;
8334 8334 case NFS4_CHECK_STATEID_BAD:
8335 8335 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8336 8336 goto end;
8337 8337 case NFS4_CHECK_STATEID_EXPIRED:
8338 8338 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8339 8339 goto end;
8340 8340 case NFS4_CHECK_STATEID_CLOSED:
8341 8341 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8342 8342 goto end;
8343 8343 case NFS4_CHECK_STATEID_UNCONFIRMED:
8344 8344 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8345 8345 goto end;
8346 8346 case NFS4_CHECK_STATEID_REPLAY:
8347 8347 /* Check the sequence id for the open owner */
8348 8348 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8349 8349 resop)) {
8350 8350 case NFS4_CHKSEQ_OKAY:
8351 8351 /*
8352 8352 * This is replayed stateid; if seqid matches
8353 8353 * next expected, then client is using wrong seqid.
8354 8354 */
8355 8355 /* FALL THROUGH */
8356 8356 case NFS4_CHKSEQ_BAD:
8357 8357 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8358 8358 goto end;
8359 8359 case NFS4_CHKSEQ_REPLAY:
8360 8360 /*
8361 8361 * Note this case is the duplicate case so
8362 8362 * resp->status is already set.
8363 8363 */
8364 8364 *cs->statusp = resp->status;
8365 8365 rfs4_update_lease(sp->rs_owner->ro_client);
8366 8366 goto end;
8367 8367 }
8368 8368 break;
8369 8369 default:
8370 8370 ASSERT(FALSE);
8371 8371 break;
8372 8372 }
8373 8373
8374 8374 rfs4_dbe_lock(sp->rs_dbe);
8375 8375
8376 8376 /* Update the stateid. */
8377 8377 next_stateid(&sp->rs_stateid);
8378 8378 resp->open_stateid = sp->rs_stateid.stateid;
8379 8379
8380 8380 rfs4_dbe_unlock(sp->rs_dbe);
8381 8381
8382 8382 rfs4_update_lease(sp->rs_owner->ro_client);
8383 8383 rfs4_update_open_sequence(sp->rs_owner);
8384 8384 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8385 8385
8386 8386 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8387 8387
8388 8388 *cs->statusp = resp->status = status;
8389 8389
8390 8390 end:
8391 8391 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8392 8392 rfs4_state_rele(sp);
8393 8393 out:
8394 8394 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8395 8395 CLOSE4res *, resp);
8396 8396 }
8397 8397
8398 8398 /*
8399 8399 * Manage the counts on the file struct and close all file locks
8400 8400 */
8401 8401 /*ARGSUSED*/
8402 8402 void
8403 8403 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8404 8404 bool_t close_of_client)
8405 8405 {
8406 8406 rfs4_file_t *fp = sp->rs_finfo;
8407 8407 rfs4_lo_state_t *lsp;
8408 8408 int fflags = 0;
8409 8409
8410 8410 /*
8411 8411 * If this call is part of the larger closing down of client
8412 8412 * state then it is just easier to release all locks
8413 8413 * associated with this client instead of going through each
8414 8414 * individual file and cleaning locks there.
8415 8415 */
8416 8416 if (close_of_client) {
8417 8417 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8418 8418 !list_is_empty(&sp->rs_lostatelist) &&
8419 8419 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8420 8420 /* Is the PxFS kernel module loaded? */
8421 8421 if (lm_remove_file_locks != NULL) {
8422 8422 int new_sysid;
8423 8423
8424 8424 /* Encode the cluster nodeid in new sysid */
8425 8425 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8426 8426 lm_set_nlmid_flk(&new_sysid);
8427 8427
8428 8428 /*
8429 8429 * This PxFS routine removes file locks for a
8430 8430 * client over all nodes of a cluster.
8431 8431 */
8432 8432 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8433 8433 "lm_remove_file_locks(sysid=0x%x)\n",
8434 8434 new_sysid));
8435 8435 (*lm_remove_file_locks)(new_sysid);
8436 8436 } else {
8437 8437 struct flock64 flk;
8438 8438
8439 8439 /* Release all locks for this client */
8440 8440 flk.l_type = F_UNLKSYS;
8441 8441 flk.l_whence = 0;
8442 8442 flk.l_start = 0;
8443 8443 flk.l_len = 0;
8444 8444 flk.l_sysid =
8445 8445 sp->rs_owner->ro_client->rc_sysidt;
8446 8446 flk.l_pid = 0;
8447 8447 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8448 8448 &flk, F_REMOTELOCK | FREAD | FWRITE,
8449 8449 (u_offset_t)0, NULL, CRED(), NULL);
8450 8450 }
8451 8451
8452 8452 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8453 8453 }
8454 8454 }
8455 8455
8456 8456 /*
8457 8457 * Release all locks on this file by this lock owner or at
8458 8458 * least mark the locks as having been released
8459 8459 */
8460 8460 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8461 8461 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8462 8462 lsp->rls_locks_cleaned = TRUE;
8463 8463
8464 8464 /* Was this already taken care of above? */
8465 8465 if (!close_of_client &&
8466 8466 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8467 8467 (void) cleanlocks(sp->rs_finfo->rf_vp,
8468 8468 lsp->rls_locker->rl_pid,
8469 8469 lsp->rls_locker->rl_client->rc_sysidt);
8470 8470 }
8471 8471
8472 8472 /*
8473 8473 * Release any shrlocks associated with this open state ID.
8474 8474 * This must be done before the rfs4_state gets marked closed.
8475 8475 */
8476 8476 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8477 8477 (void) rfs4_unshare(sp);
8478 8478
8479 8479 if (sp->rs_open_access) {
8480 8480 rfs4_dbe_lock(fp->rf_dbe);
8481 8481
8482 8482 /*
8483 8483 * Decrement the count for each access and deny bit that this
8484 8484 * state has contributed to the file.
8485 8485 * If the file counts go to zero
8486 8486 * clear the appropriate bit in the appropriate mask.
8487 8487 */
8488 8488 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8489 8489 fp->rf_access_read--;
8490 8490 fflags |= FREAD;
8491 8491 if (fp->rf_access_read == 0)
8492 8492 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8493 8493 }
8494 8494 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8495 8495 fp->rf_access_write--;
8496 8496 fflags |= FWRITE;
8497 8497 if (fp->rf_access_write == 0)
8498 8498 fp->rf_share_access &=
8499 8499 ~OPEN4_SHARE_ACCESS_WRITE;
8500 8500 }
8501 8501 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8502 8502 fp->rf_deny_read--;
8503 8503 if (fp->rf_deny_read == 0)
8504 8504 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8505 8505 }
8506 8506 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8507 8507 fp->rf_deny_write--;
8508 8508 if (fp->rf_deny_write == 0)
8509 8509 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8510 8510 }
8511 8511
8512 8512 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8513 8513
8514 8514 rfs4_dbe_unlock(fp->rf_dbe);
8515 8515
8516 8516 sp->rs_open_access = 0;
8517 8517 sp->rs_open_deny = 0;
8518 8518 }
8519 8519 }
8520 8520
8521 8521 /*
8522 8522 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8523 8523 */
8524 8524 static nfsstat4
8525 8525 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8526 8526 {
8527 8527 rfs4_lockowner_t *lo;
8528 8528 rfs4_client_t *cp;
8529 8529 uint32_t len;
8530 8530
8531 8531 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8532 8532 if (lo != NULL) {
8533 8533 cp = lo->rl_client;
8534 8534 if (rfs4_lease_expired(cp)) {
8535 8535 rfs4_lockowner_rele(lo);
8536 8536 rfs4_dbe_hold(cp->rc_dbe);
8537 8537 rfs4_client_close(cp);
8538 8538 return (NFS4ERR_EXPIRED);
8539 8539 }
8540 8540 dp->owner.clientid = lo->rl_owner.clientid;
8541 8541 len = lo->rl_owner.owner_len;
8542 8542 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8543 8543 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8544 8544 dp->owner.owner_len = len;
8545 8545 rfs4_lockowner_rele(lo);
8546 8546 goto finish;
8547 8547 }
8548 8548
8549 8549 /*
8550 8550 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8551 8551 * of the client id contain the boot time for a NFS4 lock. So we
8552 8552 * fabricate and identity by setting clientid to the sysid, and
8553 8553 * the lock owner to the pid.
8554 8554 */
8555 8555 dp->owner.clientid = flk->l_sysid;
8556 8556 len = sizeof (pid_t);
8557 8557 dp->owner.owner_len = len;
8558 8558 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8559 8559 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8560 8560 finish:
8561 8561 dp->offset = flk->l_start;
8562 8562 dp->length = flk->l_len;
8563 8563
8564 8564 if (flk->l_type == F_RDLCK)
8565 8565 dp->locktype = READ_LT;
8566 8566 else if (flk->l_type == F_WRLCK)
8567 8567 dp->locktype = WRITE_LT;
8568 8568 else
8569 8569 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8570 8570
8571 8571 return (NFS4_OK);
8572 8572 }
8573 8573
8574 8574 /*
8575 8575 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8576 8576 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8577 8577 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8578 8578 * for that (obviously); they are sending the LOCK requests with some delays
8579 8579 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8580 8580 * locking and delay implementation at the client side.
8581 8581 *
8582 8582 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8583 8583 * fast retries on its own (the for loop below) in a hope the lock will be
8584 8584 * available soon. And if not, the client won't need to resend the LOCK
8585 8585 * requests so fast to check the lock availability. This basically saves some
8586 8586 * network traffic and tries to make sure the client gets the lock ASAP.
8587 8587 */
8588 8588 static int
8589 8589 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8590 8590 {
8591 8591 int error;
8592 8592 struct flock64 flk;
8593 8593 int i;
8594 8594 clock_t delaytime;
8595 8595 int cmd;
8596 8596 int spin_cnt = 0;
8597 8597
8598 8598 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8599 8599 retry:
8600 8600 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8601 8601
8602 8602 for (i = 0; i < rfs4_maxlock_tries; i++) {
8603 8603 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8604 8604 error = VOP_FRLOCK(vp, cmd,
8605 8605 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8606 8606
8607 8607 if (error != EAGAIN && error != EACCES)
8608 8608 break;
8609 8609
8610 8610 if (i < rfs4_maxlock_tries - 1) {
8611 8611 delay(delaytime);
8612 8612 delaytime *= 2;
8613 8613 }
8614 8614 }
8615 8615
8616 8616 if (error == EAGAIN || error == EACCES) {
8617 8617 /* Get the owner of the lock */
8618 8618 flk = *flock;
8619 8619 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8620 8620 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8621 8621 NULL) == 0) {
8622 8622 /*
8623 8623 * There's a race inherent in the current VOP_FRLOCK
8624 8624 * design where:
8625 8625 * a: "other guy" takes a lock that conflicts with a
8626 8626 * lock we want
8627 8627 * b: we attempt to take our lock (non-blocking) and
8628 8628 * the attempt fails.
8629 8629 * c: "other guy" releases the conflicting lock
8630 8630 * d: we ask what lock conflicts with the lock we want,
8631 8631 * getting F_UNLCK (no lock blocks us)
8632 8632 *
8633 8633 * If we retry the non-blocking lock attempt in this
8634 8634 * case (restart at step 'b') there's some possibility
8635 8635 * that many such attempts might fail. However a test
8636 8636 * designed to actually provoke this race shows that
8637 8637 * the vast majority of cases require no retry, and
8638 8638 * only a few took as many as three retries. Here's
8639 8639 * the test outcome:
8640 8640 *
8641 8641 * number of retries how many times we needed
8642 8642 * that many retries
8643 8643 * 0 79461
8644 8644 * 1 862
8645 8645 * 2 49
8646 8646 * 3 5
8647 8647 *
8648 8648 * Given those empirical results, we arbitrarily limit
8649 8649 * the retry count to ten.
8650 8650 *
8651 8651 * If we actually make to ten retries and give up,
8652 8652 * nothing catastrophic happens, but we're unable to
8653 8653 * return the information about the conflicting lock to
8654 8654 * the NFS client. That's an acceptable trade off vs.
8655 8655 * letting this retry loop run forever.
8656 8656 */
8657 8657 if (flk.l_type == F_UNLCK) {
8658 8658 if (spin_cnt++ < 10) {
8659 8659 /* No longer locked, retry */
8660 8660 goto retry;
8661 8661 }
8662 8662 } else {
8663 8663 *flock = flk;
8664 8664 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8665 8665 F_GETLK, &flk);
8666 8666 }
8667 8667 }
8668 8668 }
8669 8669
8670 8670 return (error);
8671 8671 }
8672 8672
8673 8673 /*ARGSUSED*/
8674 8674 static nfsstat4
8675 8675 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8676 8676 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8677 8677 {
8678 8678 nfsstat4 status;
8679 8679 rfs4_lockowner_t *lo = lsp->rls_locker;
8680 8680 rfs4_state_t *sp = lsp->rls_state;
8681 8681 struct flock64 flock;
8682 8682 int16_t ltype;
8683 8683 int flag;
8684 8684 int error;
8685 8685 sysid_t sysid;
8686 8686 LOCK4res *lres;
8687 8687 vnode_t *vp;
8688 8688
8689 8689 if (rfs4_lease_expired(lo->rl_client)) {
8690 8690 return (NFS4ERR_EXPIRED);
8691 8691 }
8692 8692
8693 8693 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8694 8694 return (status);
8695 8695
8696 8696 /* Check for zero length. To lock to end of file use all ones for V4 */
8697 8697 if (length == 0)
8698 8698 return (NFS4ERR_INVAL);
8699 8699 else if (length == (length4)(~0))
8700 8700 length = 0; /* Posix to end of file */
8701 8701
8702 8702 retry:
8703 8703 rfs4_dbe_lock(sp->rs_dbe);
8704 8704 if (sp->rs_closed == TRUE) {
8705 8705 rfs4_dbe_unlock(sp->rs_dbe);
8706 8706 return (NFS4ERR_OLD_STATEID);
8707 8707 }
8708 8708
8709 8709 if (resop->resop != OP_LOCKU) {
8710 8710 switch (locktype) {
8711 8711 case READ_LT:
8712 8712 case READW_LT:
8713 8713 if ((sp->rs_share_access
8714 8714 & OPEN4_SHARE_ACCESS_READ) == 0) {
8715 8715 rfs4_dbe_unlock(sp->rs_dbe);
8716 8716
8717 8717 return (NFS4ERR_OPENMODE);
8718 8718 }
8719 8719 ltype = F_RDLCK;
8720 8720 break;
8721 8721 case WRITE_LT:
8722 8722 case WRITEW_LT:
8723 8723 if ((sp->rs_share_access
8724 8724 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8725 8725 rfs4_dbe_unlock(sp->rs_dbe);
8726 8726
8727 8727 return (NFS4ERR_OPENMODE);
8728 8728 }
8729 8729 ltype = F_WRLCK;
8730 8730 break;
8731 8731 }
8732 8732 } else
8733 8733 ltype = F_UNLCK;
8734 8734
8735 8735 flock.l_type = ltype;
8736 8736 flock.l_whence = 0; /* SEEK_SET */
8737 8737 flock.l_start = offset;
8738 8738 flock.l_len = length;
8739 8739 flock.l_sysid = sysid;
8740 8740 flock.l_pid = lsp->rls_locker->rl_pid;
8741 8741
8742 8742 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8743 8743 if (flock.l_len < 0 || flock.l_start < 0) {
8744 8744 rfs4_dbe_unlock(sp->rs_dbe);
8745 8745 return (NFS4ERR_INVAL);
8746 8746 }
8747 8747
8748 8748 /*
8749 8749 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8750 8750 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8751 8751 */
8752 8752 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8753 8753
8754 8754 vp = sp->rs_finfo->rf_vp;
8755 8755 VN_HOLD(vp);
8756 8756
8757 8757 /*
8758 8758 * We need to unlock sp before we call the underlying filesystem to
8759 8759 * acquire the file lock.
8760 8760 */
8761 8761 rfs4_dbe_unlock(sp->rs_dbe);
8762 8762
8763 8763 error = setlock(vp, &flock, flag, cred);
8764 8764
8765 8765 /*
8766 8766 * Make sure the file is still open. In a case the file was closed in
8767 8767 * the meantime, clean the lock we acquired using the setlock() call
8768 8768 * above, and return the appropriate error.
8769 8769 */
8770 8770 rfs4_dbe_lock(sp->rs_dbe);
8771 8771 if (sp->rs_closed == TRUE) {
8772 8772 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8773 8773 rfs4_dbe_unlock(sp->rs_dbe);
8774 8774
8775 8775 VN_RELE(vp);
8776 8776
8777 8777 return (NFS4ERR_OLD_STATEID);
8778 8778 }
8779 8779 rfs4_dbe_unlock(sp->rs_dbe);
8780 8780
8781 8781 VN_RELE(vp);
8782 8782
8783 8783 if (error == 0) {
8784 8784 rfs4_dbe_lock(lsp->rls_dbe);
8785 8785 next_stateid(&lsp->rls_lockid);
8786 8786 rfs4_dbe_unlock(lsp->rls_dbe);
8787 8787 }
8788 8788
8789 8789 /*
8790 8790 * N.B. We map error values to nfsv4 errors. This is differrent
8791 8791 * than puterrno4 routine.
8792 8792 */
8793 8793 switch (error) {
8794 8794 case 0:
8795 8795 status = NFS4_OK;
8796 8796 break;
8797 8797 case EAGAIN:
8798 8798 case EACCES: /* Old value */
8799 8799 /* Can only get here if op is OP_LOCK */
8800 8800 ASSERT(resop->resop == OP_LOCK);
8801 8801 lres = &resop->nfs_resop4_u.oplock;
8802 8802 status = NFS4ERR_DENIED;
8803 8803 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8804 8804 == NFS4ERR_EXPIRED)
8805 8805 goto retry;
8806 8806 break;
8807 8807 case ENOLCK:
8808 8808 status = NFS4ERR_DELAY;
8809 8809 break;
8810 8810 case EOVERFLOW:
8811 8811 status = NFS4ERR_INVAL;
8812 8812 break;
8813 8813 case EINVAL:
8814 8814 status = NFS4ERR_NOTSUPP;
8815 8815 break;
8816 8816 default:
8817 8817 status = NFS4ERR_SERVERFAULT;
8818 8818 break;
8819 8819 }
8820 8820
8821 8821 return (status);
8822 8822 }
8823 8823
8824 8824 /*ARGSUSED*/
8825 8825 void
8826 8826 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8827 8827 struct svc_req *req, struct compound_state *cs)
8828 8828 {
8829 8829 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8830 8830 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8831 8831 nfsstat4 status;
8832 8832 stateid4 *stateid;
8833 8833 rfs4_lockowner_t *lo;
8834 8834 rfs4_client_t *cp;
8835 8835 rfs4_state_t *sp = NULL;
8836 8836 rfs4_lo_state_t *lsp = NULL;
8837 8837 bool_t ls_sw_held = FALSE;
8838 8838 bool_t create = TRUE;
8839 8839 bool_t lcreate = TRUE;
8840 8840 bool_t dup_lock = FALSE;
8841 8841 int rc;
8842 8842
8843 8843 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8844 8844 LOCK4args *, args);
8845 8845
8846 8846 if (cs->vp == NULL) {
8847 8847 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8848 8848 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8849 8849 cs, LOCK4res *, resp);
8850 8850 return;
8851 8851 }
8852 8852
8853 8853 if (args->locker.new_lock_owner) {
8854 8854 /* Create a new lockowner for this instance */
8855 8855 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8856 8856
8857 8857 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8858 8858
8859 8859 stateid = &olo->open_stateid;
8860 8860 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8861 8861 if (status != NFS4_OK) {
8862 8862 NFS4_DEBUG(rfs4_debug,
8863 8863 (CE_NOTE, "Get state failed in lock %d", status));
8864 8864 *cs->statusp = resp->status = status;
8865 8865 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8866 8866 cs, LOCK4res *, resp);
8867 8867 return;
8868 8868 }
8869 8869
8870 8870 /* Ensure specified filehandle matches */
8871 8871 if (cs->vp != sp->rs_finfo->rf_vp) {
8872 8872 rfs4_state_rele(sp);
8873 8873 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8874 8874 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8875 8875 cs, LOCK4res *, resp);
8876 8876 return;
8877 8877 }
8878 8878
8879 8879 /* hold off other access to open_owner while we tinker */
8880 8880 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8881 8881
8882 8882 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8883 8883 case NFS4_CHECK_STATEID_OLD:
8884 8884 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8885 8885 goto end;
8886 8886 case NFS4_CHECK_STATEID_BAD:
8887 8887 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8888 8888 goto end;
8889 8889 case NFS4_CHECK_STATEID_EXPIRED:
8890 8890 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8891 8891 goto end;
8892 8892 case NFS4_CHECK_STATEID_UNCONFIRMED:
8893 8893 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8894 8894 goto end;
8895 8895 case NFS4_CHECK_STATEID_CLOSED:
8896 8896 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8897 8897 goto end;
8898 8898 case NFS4_CHECK_STATEID_OKAY:
8899 8899 case NFS4_CHECK_STATEID_REPLAY:
8900 8900 switch (rfs4_check_olo_seqid(olo->open_seqid,
8901 8901 sp->rs_owner, resop)) {
8902 8902 case NFS4_CHKSEQ_OKAY:
8903 8903 if (rc == NFS4_CHECK_STATEID_OKAY)
8904 8904 break;
8905 8905 /*
8906 8906 * This is replayed stateid; if seqid
8907 8907 * matches next expected, then client
8908 8908 * is using wrong seqid.
8909 8909 */
8910 8910 /* FALLTHROUGH */
8911 8911 case NFS4_CHKSEQ_BAD:
8912 8912 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8913 8913 goto end;
8914 8914 case NFS4_CHKSEQ_REPLAY:
8915 8915 /* This is a duplicate LOCK request */
8916 8916 dup_lock = TRUE;
8917 8917
8918 8918 /*
8919 8919 * For a duplicate we do not want to
8920 8920 * create a new lockowner as it should
8921 8921 * already exist.
8922 8922 * Turn off the lockowner create flag.
8923 8923 */
8924 8924 lcreate = FALSE;
8925 8925 }
8926 8926 break;
8927 8927 }
8928 8928
8929 8929 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8930 8930 if (lo == NULL) {
8931 8931 NFS4_DEBUG(rfs4_debug,
8932 8932 (CE_NOTE, "rfs4_op_lock: no lock owner"));
8933 8933 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8934 8934 goto end;
8935 8935 }
8936 8936
8937 8937 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8938 8938 if (lsp == NULL) {
8939 8939 rfs4_update_lease(sp->rs_owner->ro_client);
8940 8940 /*
8941 8941 * Only update theh open_seqid if this is not
8942 8942 * a duplicate request
8943 8943 */
8944 8944 if (dup_lock == FALSE) {
8945 8945 rfs4_update_open_sequence(sp->rs_owner);
8946 8946 }
8947 8947
8948 8948 NFS4_DEBUG(rfs4_debug,
8949 8949 (CE_NOTE, "rfs4_op_lock: no state"));
8950 8950 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8951 8951 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8952 8952 rfs4_lockowner_rele(lo);
8953 8953 goto end;
8954 8954 }
8955 8955
8956 8956 /*
8957 8957 * This is the new_lock_owner branch and the client is
8958 8958 * supposed to be associating a new lock_owner with
8959 8959 * the open file at this point. If we find that a
8960 8960 * lock_owner/state association already exists and a
8961 8961 * successful LOCK request was returned to the client,
8962 8962 * an error is returned to the client since this is
8963 8963 * not appropriate. The client should be using the
8964 8964 * existing lock_owner branch.
8965 8965 */
8966 8966 if (dup_lock == FALSE && create == FALSE) {
8967 8967 if (lsp->rls_lock_completed == TRUE) {
8968 8968 *cs->statusp =
8969 8969 resp->status = NFS4ERR_BAD_SEQID;
8970 8970 rfs4_lockowner_rele(lo);
8971 8971 goto end;
8972 8972 }
8973 8973 }
8974 8974
8975 8975 rfs4_update_lease(sp->rs_owner->ro_client);
8976 8976
8977 8977 /*
8978 8978 * Only update theh open_seqid if this is not
8979 8979 * a duplicate request
8980 8980 */
8981 8981 if (dup_lock == FALSE) {
8982 8982 rfs4_update_open_sequence(sp->rs_owner);
8983 8983 }
8984 8984
8985 8985 /*
8986 8986 * If this is a duplicate lock request, just copy the
8987 8987 * previously saved reply and return.
8988 8988 */
8989 8989 if (dup_lock == TRUE) {
8990 8990 /* verify that lock_seqid's match */
8991 8991 if (lsp->rls_seqid != olo->lock_seqid) {
8992 8992 NFS4_DEBUG(rfs4_debug,
8993 8993 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8994 8994 "lsp->seqid=%d old->seqid=%d",
8995 8995 lsp->rls_seqid, olo->lock_seqid));
8996 8996 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8997 8997 } else {
8998 8998 rfs4_copy_reply(resop, &lsp->rls_reply);
8999 8999 /*
9000 9000 * Make sure to copy the just
9001 9001 * retrieved reply status into the
9002 9002 * overall compound status
9003 9003 */
9004 9004 *cs->statusp = resp->status;
9005 9005 }
9006 9006 rfs4_lockowner_rele(lo);
9007 9007 goto end;
9008 9008 }
9009 9009
9010 9010 rfs4_dbe_lock(lsp->rls_dbe);
9011 9011
9012 9012 /* Make sure to update the lock sequence id */
9013 9013 lsp->rls_seqid = olo->lock_seqid;
9014 9014
9015 9015 NFS4_DEBUG(rfs4_debug,
9016 9016 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9017 9017
9018 9018 /*
9019 9019 * This is used to signify the newly created lockowner
9020 9020 * stateid and its sequence number. The checks for
9021 9021 * sequence number and increment don't occur on the
9022 9022 * very first lock request for a lockowner.
9023 9023 */
9024 9024 lsp->rls_skip_seqid_check = TRUE;
9025 9025
9026 9026 /* hold off other access to lsp while we tinker */
9027 9027 rfs4_sw_enter(&lsp->rls_sw);
9028 9028 ls_sw_held = TRUE;
9029 9029
9030 9030 rfs4_dbe_unlock(lsp->rls_dbe);
9031 9031
9032 9032 rfs4_lockowner_rele(lo);
9033 9033 } else {
9034 9034 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9035 9035 /* get lsp and hold the lock on the underlying file struct */
9036 9036 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9037 9037 != NFS4_OK) {
9038 9038 *cs->statusp = resp->status = status;
9039 9039 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9040 9040 cs, LOCK4res *, resp);
9041 9041 return;
9042 9042 }
9043 9043 create = FALSE; /* We didn't create lsp */
9044 9044
9045 9045 /* Ensure specified filehandle matches */
9046 9046 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9047 9047 rfs4_lo_state_rele(lsp, TRUE);
9048 9048 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9049 9049 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9050 9050 cs, LOCK4res *, resp);
9051 9051 return;
9052 9052 }
9053 9053
9054 9054 /* hold off other access to lsp while we tinker */
9055 9055 rfs4_sw_enter(&lsp->rls_sw);
9056 9056 ls_sw_held = TRUE;
9057 9057
9058 9058 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9059 9059 /*
9060 9060 * The stateid looks like it was okay (expected to be
9061 9061 * the next one)
9062 9062 */
9063 9063 case NFS4_CHECK_STATEID_OKAY:
9064 9064 /*
9065 9065 * The sequence id is now checked. Determine
9066 9066 * if this is a replay or if it is in the
9067 9067 * expected (next) sequence. In the case of a
9068 9068 * replay, there are two replay conditions
9069 9069 * that may occur. The first is the normal
9070 9070 * condition where a LOCK is done with a
9071 9071 * NFS4_OK response and the stateid is
9072 9072 * updated. That case is handled below when
9073 9073 * the stateid is identified as a REPLAY. The
9074 9074 * second is the case where an error is
9075 9075 * returned, like NFS4ERR_DENIED, and the
9076 9076 * sequence number is updated but the stateid
9077 9077 * is not updated. This second case is dealt
9078 9078 * with here. So it may seem odd that the
9079 9079 * stateid is okay but the sequence id is a
9080 9080 * replay but it is okay.
9081 9081 */
9082 9082 switch (rfs4_check_lock_seqid(
9083 9083 args->locker.locker4_u.lock_owner.lock_seqid,
9084 9084 lsp, resop)) {
9085 9085 case NFS4_CHKSEQ_REPLAY:
9086 9086 if (resp->status != NFS4_OK) {
9087 9087 /*
9088 9088 * Here is our replay and need
9089 9089 * to verify that the last
9090 9090 * response was an error.
9091 9091 */
9092 9092 *cs->statusp = resp->status;
9093 9093 goto end;
9094 9094 }
9095 9095 /*
9096 9096 * This is done since the sequence id
9097 9097 * looked like a replay but it didn't
9098 9098 * pass our check so a BAD_SEQID is
9099 9099 * returned as a result.
9100 9100 */
9101 9101 /*FALLTHROUGH*/
9102 9102 case NFS4_CHKSEQ_BAD:
9103 9103 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9104 9104 goto end;
9105 9105 case NFS4_CHKSEQ_OKAY:
9106 9106 /* Everything looks okay move ahead */
9107 9107 break;
9108 9108 }
9109 9109 break;
9110 9110 case NFS4_CHECK_STATEID_OLD:
9111 9111 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9112 9112 goto end;
9113 9113 case NFS4_CHECK_STATEID_BAD:
9114 9114 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9115 9115 goto end;
9116 9116 case NFS4_CHECK_STATEID_EXPIRED:
9117 9117 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9118 9118 goto end;
9119 9119 case NFS4_CHECK_STATEID_CLOSED:
9120 9120 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9121 9121 goto end;
9122 9122 case NFS4_CHECK_STATEID_REPLAY:
9123 9123 switch (rfs4_check_lock_seqid(
9124 9124 args->locker.locker4_u.lock_owner.lock_seqid,
9125 9125 lsp, resop)) {
9126 9126 case NFS4_CHKSEQ_OKAY:
9127 9127 /*
9128 9128 * This is a replayed stateid; if
9129 9129 * seqid matches the next expected,
9130 9130 * then client is using wrong seqid.
9131 9131 */
9132 9132 case NFS4_CHKSEQ_BAD:
9133 9133 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9134 9134 goto end;
9135 9135 case NFS4_CHKSEQ_REPLAY:
9136 9136 rfs4_update_lease(lsp->rls_locker->rl_client);
9137 9137 *cs->statusp = status = resp->status;
9138 9138 goto end;
9139 9139 }
9140 9140 break;
9141 9141 default:
9142 9142 ASSERT(FALSE);
9143 9143 break;
9144 9144 }
9145 9145
9146 9146 rfs4_update_lock_sequence(lsp);
9147 9147 rfs4_update_lease(lsp->rls_locker->rl_client);
9148 9148 }
9149 9149
9150 9150 /*
9151 9151 * NFS4 only allows locking on regular files, so
9152 9152 * verify type of object.
9153 9153 */
9154 9154 if (cs->vp->v_type != VREG) {
9155 9155 if (cs->vp->v_type == VDIR)
9156 9156 status = NFS4ERR_ISDIR;
9157 9157 else
9158 9158 status = NFS4ERR_INVAL;
9159 9159 goto out;
9160 9160 }
9161 9161
9162 9162 cp = lsp->rls_state->rs_owner->ro_client;
9163 9163
9164 9164 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9165 9165 status = NFS4ERR_GRACE;
9166 9166 goto out;
9167 9167 }
9168 9168
9169 9169 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9170 9170 status = NFS4ERR_NO_GRACE;
9171 9171 goto out;
9172 9172 }
9173 9173
9174 9174 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9175 9175 status = NFS4ERR_NO_GRACE;
9176 9176 goto out;
9177 9177 }
9178 9178
9179 9179 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9180 9180 cs->deleg = TRUE;
9181 9181
9182 9182 status = rfs4_do_lock(lsp, args->locktype,
9183 9183 args->offset, args->length, cs->cr, resop);
9184 9184
9185 9185 out:
9186 9186 lsp->rls_skip_seqid_check = FALSE;
9187 9187
9188 9188 *cs->statusp = resp->status = status;
9189 9189
9190 9190 if (status == NFS4_OK) {
9191 9191 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9192 9192 lsp->rls_lock_completed = TRUE;
9193 9193 }
9194 9194 /*
9195 9195 * Only update the "OPEN" response here if this was a new
9196 9196 * lock_owner
9197 9197 */
9198 9198 if (sp)
9199 9199 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9200 9200
9201 9201 rfs4_update_lock_resp(lsp, resop);
9202 9202
9203 9203 end:
9204 9204 if (lsp) {
9205 9205 if (ls_sw_held)
9206 9206 rfs4_sw_exit(&lsp->rls_sw);
9207 9207 /*
9208 9208 * If an sp obtained, then the lsp does not represent
9209 9209 * a lock on the file struct.
9210 9210 */
9211 9211 if (sp != NULL)
9212 9212 rfs4_lo_state_rele(lsp, FALSE);
9213 9213 else
9214 9214 rfs4_lo_state_rele(lsp, TRUE);
9215 9215 }
9216 9216 if (sp) {
9217 9217 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9218 9218 rfs4_state_rele(sp);
9219 9219 }
9220 9220
9221 9221 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9222 9222 LOCK4res *, resp);
9223 9223 }
9224 9224
9225 9225 /* free function for LOCK/LOCKT */
9226 9226 static void
9227 9227 lock_denied_free(nfs_resop4 *resop)
9228 9228 {
9229 9229 LOCK4denied *dp = NULL;
9230 9230
9231 9231 switch (resop->resop) {
9232 9232 case OP_LOCK:
9233 9233 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9234 9234 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9235 9235 break;
9236 9236 case OP_LOCKT:
9237 9237 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9238 9238 dp = &resop->nfs_resop4_u.oplockt.denied;
9239 9239 break;
9240 9240 default:
9241 9241 break;
9242 9242 }
9243 9243
9244 9244 if (dp)
9245 9245 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9246 9246 }
9247 9247
9248 9248 /*ARGSUSED*/
9249 9249 void
9250 9250 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9251 9251 struct svc_req *req, struct compound_state *cs)
9252 9252 {
9253 9253 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9254 9254 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9255 9255 nfsstat4 status;
9256 9256 stateid4 *stateid = &args->lock_stateid;
9257 9257 rfs4_lo_state_t *lsp;
9258 9258
9259 9259 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9260 9260 LOCKU4args *, args);
9261 9261
9262 9262 if (cs->vp == NULL) {
9263 9263 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9264 9264 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9265 9265 LOCKU4res *, resp);
9266 9266 return;
9267 9267 }
9268 9268
9269 9269 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9270 9270 *cs->statusp = resp->status = status;
9271 9271 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9272 9272 LOCKU4res *, resp);
9273 9273 return;
9274 9274 }
9275 9275
9276 9276 /* Ensure specified filehandle matches */
9277 9277 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9278 9278 rfs4_lo_state_rele(lsp, TRUE);
9279 9279 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9280 9280 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9281 9281 LOCKU4res *, resp);
9282 9282 return;
9283 9283 }
9284 9284
9285 9285 /* hold off other access to lsp while we tinker */
9286 9286 rfs4_sw_enter(&lsp->rls_sw);
9287 9287
9288 9288 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9289 9289 case NFS4_CHECK_STATEID_OKAY:
9290 9290 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9291 9291 != NFS4_CHKSEQ_OKAY) {
9292 9292 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9293 9293 goto end;
9294 9294 }
9295 9295 break;
9296 9296 case NFS4_CHECK_STATEID_OLD:
9297 9297 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9298 9298 goto end;
9299 9299 case NFS4_CHECK_STATEID_BAD:
9300 9300 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9301 9301 goto end;
9302 9302 case NFS4_CHECK_STATEID_EXPIRED:
9303 9303 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9304 9304 goto end;
9305 9305 case NFS4_CHECK_STATEID_CLOSED:
9306 9306 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9307 9307 goto end;
9308 9308 case NFS4_CHECK_STATEID_REPLAY:
9309 9309 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9310 9310 case NFS4_CHKSEQ_OKAY:
9311 9311 /*
9312 9312 * This is a replayed stateid; if
9313 9313 * seqid matches the next expected,
9314 9314 * then client is using wrong seqid.
9315 9315 */
9316 9316 case NFS4_CHKSEQ_BAD:
9317 9317 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9318 9318 goto end;
9319 9319 case NFS4_CHKSEQ_REPLAY:
9320 9320 rfs4_update_lease(lsp->rls_locker->rl_client);
9321 9321 *cs->statusp = status = resp->status;
9322 9322 goto end;
9323 9323 }
9324 9324 break;
9325 9325 default:
9326 9326 ASSERT(FALSE);
9327 9327 break;
9328 9328 }
9329 9329
9330 9330 rfs4_update_lock_sequence(lsp);
9331 9331 rfs4_update_lease(lsp->rls_locker->rl_client);
9332 9332
9333 9333 /*
9334 9334 * NFS4 only allows locking on regular files, so
9335 9335 * verify type of object.
9336 9336 */
9337 9337 if (cs->vp->v_type != VREG) {
9338 9338 if (cs->vp->v_type == VDIR)
9339 9339 status = NFS4ERR_ISDIR;
9340 9340 else
9341 9341 status = NFS4ERR_INVAL;
9342 9342 goto out;
9343 9343 }
9344 9344
9345 9345 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9346 9346 status = NFS4ERR_GRACE;
9347 9347 goto out;
9348 9348 }
9349 9349
9350 9350 status = rfs4_do_lock(lsp, args->locktype,
9351 9351 args->offset, args->length, cs->cr, resop);
9352 9352
9353 9353 out:
9354 9354 *cs->statusp = resp->status = status;
9355 9355
9356 9356 if (status == NFS4_OK)
9357 9357 resp->lock_stateid = lsp->rls_lockid.stateid;
9358 9358
9359 9359 rfs4_update_lock_resp(lsp, resop);
9360 9360
9361 9361 end:
9362 9362 rfs4_sw_exit(&lsp->rls_sw);
9363 9363 rfs4_lo_state_rele(lsp, TRUE);
9364 9364
9365 9365 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9366 9366 LOCKU4res *, resp);
9367 9367 }
9368 9368
9369 9369 /*
9370 9370 * LOCKT is a best effort routine, the client can not be guaranteed that
9371 9371 * the status return is still in effect by the time the reply is received.
9372 9372 * They are numerous race conditions in this routine, but we are not required
9373 9373 * and can not be accurate.
9374 9374 */
9375 9375 /*ARGSUSED*/
9376 9376 void
9377 9377 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9378 9378 struct svc_req *req, struct compound_state *cs)
9379 9379 {
9380 9380 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9381 9381 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9382 9382 rfs4_lockowner_t *lo;
9383 9383 rfs4_client_t *cp;
9384 9384 bool_t create = FALSE;
9385 9385 struct flock64 flk;
9386 9386 int error;
9387 9387 int flag = FREAD | FWRITE;
9388 9388 int ltype;
9389 9389 length4 posix_length;
9390 9390 sysid_t sysid;
9391 9391 pid_t pid;
9392 9392
9393 9393 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9394 9394 LOCKT4args *, args);
9395 9395
9396 9396 if (cs->vp == NULL) {
9397 9397 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9398 9398 goto out;
9399 9399 }
9400 9400
9401 9401 /*
9402 9402 * NFS4 only allows locking on regular files, so
9403 9403 * verify type of object.
9404 9404 */
9405 9405 if (cs->vp->v_type != VREG) {
9406 9406 if (cs->vp->v_type == VDIR)
9407 9407 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9408 9408 else
9409 9409 *cs->statusp = resp->status = NFS4ERR_INVAL;
9410 9410 goto out;
9411 9411 }
9412 9412
9413 9413 /*
9414 9414 * Check out the clientid to ensure the server knows about it
9415 9415 * so that we correctly inform the client of a server reboot.
9416 9416 */
9417 9417 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9418 9418 == NULL) {
9419 9419 *cs->statusp = resp->status =
9420 9420 rfs4_check_clientid(&args->owner.clientid, 0);
9421 9421 goto out;
9422 9422 }
9423 9423 if (rfs4_lease_expired(cp)) {
9424 9424 rfs4_client_close(cp);
9425 9425 /*
9426 9426 * Protocol doesn't allow returning NFS4ERR_STALE as
9427 9427 * other operations do on this check so STALE_CLIENTID
9428 9428 * is returned instead
9429 9429 */
9430 9430 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9431 9431 goto out;
9432 9432 }
9433 9433
9434 9434 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9435 9435 *cs->statusp = resp->status = NFS4ERR_GRACE;
9436 9436 rfs4_client_rele(cp);
9437 9437 goto out;
9438 9438 }
9439 9439 rfs4_client_rele(cp);
9440 9440
9441 9441 resp->status = NFS4_OK;
9442 9442
9443 9443 switch (args->locktype) {
9444 9444 case READ_LT:
9445 9445 case READW_LT:
9446 9446 ltype = F_RDLCK;
9447 9447 break;
9448 9448 case WRITE_LT:
9449 9449 case WRITEW_LT:
9450 9450 ltype = F_WRLCK;
9451 9451 break;
9452 9452 }
9453 9453
9454 9454 posix_length = args->length;
9455 9455 /* Check for zero length. To lock to end of file use all ones for V4 */
9456 9456 if (posix_length == 0) {
9457 9457 *cs->statusp = resp->status = NFS4ERR_INVAL;
9458 9458 goto out;
9459 9459 } else if (posix_length == (length4)(~0)) {
9460 9460 posix_length = 0; /* Posix to end of file */
9461 9461 }
9462 9462
9463 9463 /* Find or create a lockowner */
9464 9464 lo = rfs4_findlockowner(&args->owner, &create);
9465 9465
9466 9466 if (lo) {
9467 9467 pid = lo->rl_pid;
9468 9468 if ((resp->status =
9469 9469 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9470 9470 goto err;
9471 9471 } else {
9472 9472 pid = 0;
9473 9473 sysid = lockt_sysid;
9474 9474 }
9475 9475 retry:
9476 9476 flk.l_type = ltype;
9477 9477 flk.l_whence = 0; /* SEEK_SET */
9478 9478 flk.l_start = args->offset;
9479 9479 flk.l_len = posix_length;
9480 9480 flk.l_sysid = sysid;
9481 9481 flk.l_pid = pid;
9482 9482 flag |= F_REMOTELOCK;
9483 9483
9484 9484 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9485 9485
9486 9486 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9487 9487 if (flk.l_len < 0 || flk.l_start < 0) {
9488 9488 resp->status = NFS4ERR_INVAL;
9489 9489 goto err;
9490 9490 }
9491 9491 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9492 9492 NULL, cs->cr, NULL);
9493 9493
9494 9494 /*
9495 9495 * N.B. We map error values to nfsv4 errors. This is differrent
9496 9496 * than puterrno4 routine.
9497 9497 */
9498 9498 switch (error) {
9499 9499 case 0:
9500 9500 if (flk.l_type == F_UNLCK)
9501 9501 resp->status = NFS4_OK;
9502 9502 else {
9503 9503 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9504 9504 goto retry;
9505 9505 resp->status = NFS4ERR_DENIED;
9506 9506 }
9507 9507 break;
9508 9508 case EOVERFLOW:
9509 9509 resp->status = NFS4ERR_INVAL;
9510 9510 break;
9511 9511 case EINVAL:
9512 9512 resp->status = NFS4ERR_NOTSUPP;
9513 9513 break;
9514 9514 default:
9515 9515 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9516 9516 error);
9517 9517 resp->status = NFS4ERR_SERVERFAULT;
9518 9518 break;
9519 9519 }
9520 9520
9521 9521 err:
9522 9522 if (lo)
9523 9523 rfs4_lockowner_rele(lo);
9524 9524 *cs->statusp = resp->status;
9525 9525 out:
9526 9526 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9527 9527 LOCKT4res *, resp);
9528 9528 }
9529 9529
9530 9530 int
9531 9531 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9532 9532 {
9533 9533 int err;
9534 9534 int cmd;
9535 9535 vnode_t *vp;
9536 9536 struct shrlock shr;
9537 9537 struct shr_locowner shr_loco;
9538 9538 int fflags = 0;
9539 9539
9540 9540 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9541 9541 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9542 9542
9543 9543 if (sp->rs_closed)
9544 9544 return (NFS4ERR_OLD_STATEID);
9545 9545
9546 9546 vp = sp->rs_finfo->rf_vp;
9547 9547 ASSERT(vp);
9548 9548
9549 9549 shr.s_access = shr.s_deny = 0;
9550 9550
9551 9551 if (access & OPEN4_SHARE_ACCESS_READ) {
9552 9552 fflags |= FREAD;
9553 9553 shr.s_access |= F_RDACC;
9554 9554 }
9555 9555 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9556 9556 fflags |= FWRITE;
9557 9557 shr.s_access |= F_WRACC;
9558 9558 }
9559 9559 ASSERT(shr.s_access);
9560 9560
9561 9561 if (deny & OPEN4_SHARE_DENY_READ)
9562 9562 shr.s_deny |= F_RDDNY;
9563 9563 if (deny & OPEN4_SHARE_DENY_WRITE)
9564 9564 shr.s_deny |= F_WRDNY;
9565 9565
9566 9566 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9567 9567 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9568 9568 shr_loco.sl_pid = shr.s_pid;
9569 9569 shr_loco.sl_id = shr.s_sysid;
9570 9570 shr.s_owner = (caddr_t)&shr_loco;
9571 9571 shr.s_own_len = sizeof (shr_loco);
9572 9572
9573 9573 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9574 9574
9575 9575 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9576 9576 if (err != 0) {
9577 9577 if (err == EAGAIN)
9578 9578 err = NFS4ERR_SHARE_DENIED;
9579 9579 else
9580 9580 err = puterrno4(err);
9581 9581 return (err);
9582 9582 }
9583 9583
9584 9584 sp->rs_share_access |= access;
9585 9585 sp->rs_share_deny |= deny;
9586 9586
9587 9587 return (0);
9588 9588 }
9589 9589
9590 9590 int
9591 9591 rfs4_unshare(rfs4_state_t *sp)
9592 9592 {
9593 9593 int err;
9594 9594 struct shrlock shr;
9595 9595 struct shr_locowner shr_loco;
9596 9596
9597 9597 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9598 9598
9599 9599 if (sp->rs_closed || sp->rs_share_access == 0)
9600 9600 return (0);
9601 9601
9602 9602 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9603 9603 ASSERT(sp->rs_finfo->rf_vp);
9604 9604
9605 9605 shr.s_access = shr.s_deny = 0;
9606 9606 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9607 9607 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9608 9608 shr_loco.sl_pid = shr.s_pid;
9609 9609 shr_loco.sl_id = shr.s_sysid;
9610 9610 shr.s_owner = (caddr_t)&shr_loco;
9611 9611 shr.s_own_len = sizeof (shr_loco);
9612 9612
9613 9613 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9614 9614 NULL);
9615 9615 if (err != 0) {
9616 9616 err = puterrno4(err);
9617 9617 return (err);
9618 9618 }
9619 9619
9620 9620 sp->rs_share_access = 0;
9621 9621 sp->rs_share_deny = 0;
9622 9622
9623 9623 return (0);
9624 9624
9625 9625 }
9626 9626
9627 9627 static int
9628 9628 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9629 9629 {
9630 9630 struct clist *wcl;
9631 9631 count4 count = rok->data_len;
9632 9632 int wlist_len;
9633 9633
9634 9634 wcl = args->wlist;
9635 9635 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9636 9636 return (FALSE);
9637 9637 }
9638 9638 wcl = args->wlist;
9639 9639 rok->wlist_len = wlist_len;
9640 9640 rok->wlist = wcl;
9641 9641 return (TRUE);
9642 9642 }
9643 9643
9644 9644 /* tunable to disable server referrals */
9645 9645 int rfs4_no_referrals = 0;
9646 9646
9647 9647 /*
9648 9648 * Find an NFS record in reparse point data.
9649 9649 * Returns 0 for success and <0 or an errno value on failure.
9650 9650 */
9651 9651 int
9652 9652 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9653 9653 {
9654 9654 int err;
9655 9655 char *stype, *val;
9656 9656 nvlist_t *nvl;
9657 9657 nvpair_t *curr;
9658 9658
9659 9659 if ((nvl = reparse_init()) == NULL)
9660 9660 return (-1);
9661 9661
9662 9662 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9663 9663 reparse_free(nvl);
9664 9664 return (err);
9665 9665 }
9666 9666
9667 9667 curr = NULL;
9668 9668 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9669 9669 if ((stype = nvpair_name(curr)) == NULL) {
9670 9670 reparse_free(nvl);
9671 9671 return (-2);
9672 9672 }
9673 9673 if (strncasecmp(stype, "NFS", 3) == 0)
9674 9674 break;
9675 9675 }
9676 9676
9677 9677 if ((curr == NULL) ||
9678 9678 (nvpair_value_string(curr, &val))) {
9679 9679 reparse_free(nvl);
9680 9680 return (-3);
9681 9681 }
9682 9682 *nvlp = nvl;
9683 9683 *svcp = stype;
9684 9684 *datap = val;
9685 9685 return (0);
9686 9686 }
9687 9687
9688 9688 int
9689 9689 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9690 9690 {
9691 9691 nvlist_t *nvl;
9692 9692 char *s, *d;
9693 9693
9694 9694 if (rfs4_no_referrals != 0)
9695 9695 return (B_FALSE);
9696 9696
9697 9697 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9698 9698 return (B_FALSE);
9699 9699
9700 9700 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9701 9701 return (B_FALSE);
9702 9702
9703 9703 reparse_free(nvl);
9704 9704
9705 9705 return (B_TRUE);
9706 9706 }
9707 9707
9708 9708 /*
9709 9709 * There is a user-level copy of this routine in ref_subr.c.
9710 9710 * Changes should be kept in sync.
9711 9711 */
9712 9712 static int
9713 9713 nfs4_create_components(char *path, component4 *comp4)
9714 9714 {
9715 9715 int slen, plen, ncomp;
9716 9716 char *ori_path, *nxtc, buf[MAXNAMELEN];
9717 9717
9718 9718 if (path == NULL)
9719 9719 return (0);
9720 9720
9721 9721 plen = strlen(path) + 1; /* include the terminator */
9722 9722 ori_path = path;
9723 9723 ncomp = 0;
9724 9724
9725 9725 /* count number of components in the path */
9726 9726 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9727 9727 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9728 9728 if ((slen = nxtc - path) == 0) {
9729 9729 path = nxtc + 1;
9730 9730 continue;
9731 9731 }
9732 9732
9733 9733 if (comp4 != NULL) {
9734 9734 bcopy(path, buf, slen);
9735 9735 buf[slen] = '\0';
9736 9736 (void) str_to_utf8(buf, &comp4[ncomp]);
9737 9737 }
9738 9738
9739 9739 ncomp++; /* 1 valid component */
9740 9740 path = nxtc + 1;
9741 9741 }
9742 9742 if (*nxtc == '\0' || *nxtc == '\n')
9743 9743 break;
9744 9744 }
9745 9745
9746 9746 return (ncomp);
9747 9747 }
9748 9748
9749 9749 /*
9750 9750 * There is a user-level copy of this routine in ref_subr.c.
9751 9751 * Changes should be kept in sync.
9752 9752 */
9753 9753 static int
9754 9754 make_pathname4(char *path, pathname4 *pathname)
9755 9755 {
9756 9756 int ncomp;
9757 9757 component4 *comp4;
9758 9758
9759 9759 if (pathname == NULL)
9760 9760 return (0);
9761 9761
9762 9762 if (path == NULL) {
9763 9763 pathname->pathname4_val = NULL;
9764 9764 pathname->pathname4_len = 0;
9765 9765 return (0);
9766 9766 }
9767 9767
9768 9768 /* count number of components to alloc buffer */
9769 9769 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9770 9770 pathname->pathname4_val = NULL;
9771 9771 pathname->pathname4_len = 0;
9772 9772 return (0);
9773 9773 }
9774 9774 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9775 9775
9776 9776 /* copy components into allocated buffer */
9777 9777 ncomp = nfs4_create_components(path, comp4);
9778 9778
9779 9779 pathname->pathname4_val = comp4;
9780 9780 pathname->pathname4_len = ncomp;
9781 9781
9782 9782 return (ncomp);
9783 9783 }
9784 9784
9785 9785 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9786 9786
9787 9787 fs_locations4 *
9788 9788 fetch_referral(vnode_t *vp, cred_t *cr)
9789 9789 {
9790 9790 nvlist_t *nvl;
9791 9791 char *stype, *sdata;
9792 9792 fs_locations4 *result;
9793 9793 char buf[1024];
9794 9794 size_t bufsize;
9795 9795 XDR xdr;
9796 9796 int err;
9797 9797
9798 9798 /*
9799 9799 * Check attrs to ensure it's a reparse point
9800 9800 */
9801 9801 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9802 9802 return (NULL);
9803 9803
9804 9804 /*
9805 9805 * Look for an NFS record and get the type and data
9806 9806 */
9807 9807 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9808 9808 return (NULL);
9809 9809
9810 9810 /*
9811 9811 * With the type and data, upcall to get the referral
9812 9812 */
9813 9813 bufsize = sizeof (buf);
9814 9814 bzero(buf, sizeof (buf));
9815 9815 err = reparse_kderef((const char *)stype, (const char *)sdata,
9816 9816 buf, &bufsize);
9817 9817 reparse_free(nvl);
9818 9818
9819 9819 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9820 9820 char *, stype, char *, sdata, char *, buf, int, err);
9821 9821 if (err) {
9822 9822 cmn_err(CE_NOTE,
9823 9823 "reparsed daemon not running: unable to get referral (%d)",
9824 9824 err);
9825 9825 return (NULL);
9826 9826 }
9827 9827
9828 9828 /*
9829 9829 * We get an XDR'ed record back from the kderef call
9830 9830 */
9831 9831 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9832 9832 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9833 9833 err = xdr_fs_locations4(&xdr, result);
9834 9834 XDR_DESTROY(&xdr);
9835 9835 if (err != TRUE) {
9836 9836 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9837 9837 int, err);
9838 9838 return (NULL);
9839 9839 }
9840 9840
9841 9841 /*
9842 9842 * Look at path to recover fs_root, ignoring the leading '/'
9843 9843 */
9844 9844 (void) make_pathname4(vp->v_path, &result->fs_root);
9845 9845
9846 9846 return (result);
9847 9847 }
9848 9848
9849 9849 char *
9850 9850 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9851 9851 {
9852 9852 fs_locations4 *fsl;
9853 9853 fs_location4 *fs;
9854 9854 char *server, *path, *symbuf;
9855 9855 static char *prefix = "/net/";
9856 9856 int i, size, npaths;
9857 9857 uint_t len;
9858 9858
9859 9859 /* Get the referral */
9860 9860 if ((fsl = fetch_referral(vp, cr)) == NULL)
9861 9861 return (NULL);
9862 9862
9863 9863 /* Deal with only the first location and first server */
9864 9864 fs = &fsl->locations_val[0];
9865 9865 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9866 9866 if (server == NULL) {
9867 9867 rfs4_free_fs_locations4(fsl);
9868 9868 kmem_free(fsl, sizeof (fs_locations4));
9869 9869 return (NULL);
9870 9870 }
9871 9871
9872 9872 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9873 9873 size = strlen(prefix) + len;
9874 9874 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9875 9875 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9876 9876
9877 9877 /* Allocate the symlink buffer and fill it */
9878 9878 symbuf = kmem_zalloc(size, KM_SLEEP);
9879 9879 (void) strcat(symbuf, prefix);
9880 9880 (void) strcat(symbuf, server);
9881 9881 kmem_free(server, len);
9882 9882
9883 9883 npaths = 0;
9884 9884 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9885 9885 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9886 9886 if (path == NULL)
9887 9887 continue;
9888 9888 (void) strcat(symbuf, "/");
9889 9889 (void) strcat(symbuf, path);
9890 9890 npaths++;
9891 9891 kmem_free(path, len);
9892 9892 }
9893 9893
9894 9894 rfs4_free_fs_locations4(fsl);
9895 9895 kmem_free(fsl, sizeof (fs_locations4));
9896 9896
9897 9897 if (strsz != NULL)
9898 9898 *strsz = size;
9899 9899 return (symbuf);
9900 9900 }
9901 9901
9902 9902 /*
9903 9903 * Check to see if we have a downrev Solaris client, so that we
9904 9904 * can send it a symlink instead of a referral.
9905 9905 */
9906 9906 int
9907 9907 client_is_downrev(struct svc_req *req)
9908 9908 {
9909 9909 struct sockaddr *ca;
9910 9910 rfs4_clntip_t *ci;
9911 9911 bool_t create = FALSE;
9912 9912 int is_downrev;
9913 9913
9914 9914 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9915 9915 ASSERT(ca);
9916 9916 ci = rfs4_find_clntip(ca, &create);
9917 9917 if (ci == NULL)
9918 9918 return (0);
9919 9919 is_downrev = ci->ri_no_referrals;
9920 9920 rfs4_dbe_rele(ci->ri_dbe);
9921 9921 return (is_downrev);
9922 9922 }
9923 9923
9924 9924 /*
9925 9925 * Do the main work of handling HA-NFSv4 Resource Group failover on
9926 9926 * Sun Cluster.
9927 9927 * We need to detect whether any RG admin paths have been added or removed,
9928 9928 * and adjust resources accordingly.
9929 9929 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9930 9930 * order to scale, the list and array of paths need to be held in more
9931 9931 * suitable data structures.
9932 9932 */
9933 9933 static void
9934 9934 hanfsv4_failover(nfs4_srv_t *nsrv4)
9935 9935 {
9936 9936 int i, start_grace, numadded_paths = 0;
9937 9937 char **added_paths = NULL;
9938 9938 rfs4_dss_path_t *dss_path;
9939 9939
9940 9940 /*
9941 9941 * Note: currently, dss_pathlist cannot be NULL, since
9942 9942 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9943 9943 * make the latter dynamically specified too, the following will
9944 9944 * need to be adjusted.
9945 9945 */
9946 9946
9947 9947 /*
9948 9948 * First, look for removed paths: RGs that have been failed-over
9949 9949 * away from this node.
9950 9950 * Walk the "currently-serving" dss_pathlist and, for each
9951 9951 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9952 9952 * from nfsd. If not, that RG path has been removed.
9953 9953 *
9954 9954 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9955 9955 * any duplicates.
9956 9956 */
9957 9957 dss_path = nsrv4->dss_pathlist;
9958 9958 do {
9959 9959 int found = 0;
9960 9960 char *path = dss_path->path;
9961 9961
9962 9962 /* used only for non-HA so may not be removed */
9963 9963 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9964 9964 dss_path = dss_path->next;
9965 9965 continue;
9966 9966 }
9967 9967
9968 9968 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9969 9969 int cmpret;
9970 9970 char *newpath = rfs4_dss_newpaths[i];
9971 9971
9972 9972 /*
9973 9973 * Since nfsd has sorted rfs4_dss_newpaths for us,
9974 9974 * once the return from strcmp is negative we know
9975 9975 * we've passed the point where "path" should be,
9976 9976 * and can stop searching: "path" has been removed.
9977 9977 */
9978 9978 cmpret = strcmp(path, newpath);
9979 9979 if (cmpret < 0)
9980 9980 break;
9981 9981 if (cmpret == 0) {
9982 9982 found = 1;
9983 9983 break;
9984 9984 }
9985 9985 }
9986 9986
9987 9987 if (found == 0) {
9988 9988 unsigned index = dss_path->index;
9989 9989 rfs4_servinst_t *sip = dss_path->sip;
9990 9990 rfs4_dss_path_t *path_next = dss_path->next;
9991 9991
9992 9992 /*
9993 9993 * This path has been removed.
9994 9994 * We must clear out the servinst reference to
9995 9995 * it, since it's now owned by another
9996 9996 * node: we should not attempt to touch it.
9997 9997 */
9998 9998 ASSERT(dss_path == sip->dss_paths[index]);
9999 9999 sip->dss_paths[index] = NULL;
10000 10000
10001 10001 /* remove from "currently-serving" list, and destroy */
10002 10002 remque(dss_path);
10003 10003 /* allow for NUL */
10004 10004 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10005 10005 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10006 10006
10007 10007 dss_path = path_next;
10008 10008 } else {
10009 10009 /* path was found; not removed */
10010 10010 dss_path = dss_path->next;
10011 10011 }
10012 10012 } while (dss_path != nsrv4->dss_pathlist);
10013 10013
10014 10014 /*
10015 10015 * Now, look for added paths: RGs that have been failed-over
10016 10016 * to this node.
10017 10017 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10018 10018 * for each path, check if it is on the "currently-serving"
10019 10019 * dss_pathlist. If not, that RG path has been added.
10020 10020 *
10021 10021 * Note: we don't do duplicate detection here; nfsd does that for us.
10022 10022 *
10023 10023 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10024 10024 * an upper bound for the size needed for added_paths[numadded_paths].
10025 10025 */
10026 10026
10027 10027 /* probably more space than we need, but guaranteed to be enough */
10028 10028 if (rfs4_dss_numnewpaths > 0) {
10029 10029 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10030 10030 added_paths = kmem_zalloc(sz, KM_SLEEP);
10031 10031 }
10032 10032
10033 10033 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10034 10034 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10035 10035 int found = 0;
10036 10036 char *newpath = rfs4_dss_newpaths[i];
10037 10037
10038 10038 dss_path = nsrv4->dss_pathlist;
10039 10039 do {
10040 10040 char *path = dss_path->path;
10041 10041
10042 10042 /* used only for non-HA */
10043 10043 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10044 10044 dss_path = dss_path->next;
10045 10045 continue;
10046 10046 }
10047 10047
10048 10048 if (strncmp(path, newpath, strlen(path)) == 0) {
10049 10049 found = 1;
10050 10050 break;
10051 10051 }
10052 10052
10053 10053 dss_path = dss_path->next;
10054 10054 } while (dss_path != nsrv4->dss_pathlist);
10055 10055
10056 10056 if (found == 0) {
10057 10057 added_paths[numadded_paths] = newpath;
10058 10058 numadded_paths++;
10059 10059 }
10060 10060 }
10061 10061
10062 10062 /* did we find any added paths? */
10063 10063 if (numadded_paths > 0) {
10064 10064
10065 10065 /* create a new server instance, and start its grace period */
10066 10066 start_grace = 1;
10067 10067 /* CSTYLED */
10068 10068 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10069 10069
10070 10070 /* read in the stable storage state from these paths */
10071 10071 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10072 10072
10073 10073 /*
10074 10074 * Multiple failovers during a grace period will cause
10075 10075 * clients of the same resource group to be partitioned
10076 10076 * into different server instances, with different
10077 10077 * grace periods. Since clients of the same resource
10078 10078 * group must be subject to the same grace period,
10079 10079 * we need to reset all currently active grace periods.
10080 10080 */
10081 10081 rfs4_grace_reset_all(nsrv4);
10082 10082 }
10083 10083
10084 10084 if (rfs4_dss_numnewpaths > 0)
10085 10085 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10086 10086 }
|
↓ open down ↓ |
7368 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX