Print this page
dss_paths[] entries need cleanup too
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 28 * All Rights Reserved
29 29 */
30 30
31 31 /*
32 32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33 33 * Copyright 2019 Nexenta Systems, Inc.
34 34 * Copyright 2019 Nexenta by DDN, Inc.
35 35 */
36 36
37 37 #include <sys/param.h>
38 38 #include <sys/types.h>
39 39 #include <sys/systm.h>
40 40 #include <sys/cred.h>
41 41 #include <sys/buf.h>
42 42 #include <sys/vfs.h>
43 43 #include <sys/vfs_opreg.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/uio.h>
46 46 #include <sys/errno.h>
47 47 #include <sys/sysmacros.h>
48 48 #include <sys/statvfs.h>
49 49 #include <sys/kmem.h>
50 50 #include <sys/dirent.h>
51 51 #include <sys/cmn_err.h>
52 52 #include <sys/debug.h>
53 53 #include <sys/systeminfo.h>
54 54 #include <sys/flock.h>
55 55 #include <sys/pathname.h>
56 56 #include <sys/nbmlock.h>
57 57 #include <sys/share.h>
58 58 #include <sys/atomic.h>
59 59 #include <sys/policy.h>
60 60 #include <sys/fem.h>
61 61 #include <sys/sdt.h>
62 62 #include <sys/ddi.h>
63 63 #include <sys/zone.h>
64 64
65 65 #include <fs/fs_reparse.h>
66 66
67 67 #include <rpc/types.h>
68 68 #include <rpc/auth.h>
69 69 #include <rpc/rpcsec_gss.h>
70 70 #include <rpc/svc.h>
71 71
72 72 #include <nfs/nfs.h>
73 73 #include <nfs/nfssys.h>
74 74 #include <nfs/export.h>
75 75 #include <nfs/nfs_cmd.h>
76 76 #include <nfs/lm.h>
77 77 #include <nfs/nfs4.h>
78 78 #include <nfs/nfs4_drc.h>
79 79
80 80 #include <sys/strsubr.h>
81 81 #include <sys/strsun.h>
82 82
83 83 #include <inet/common.h>
84 84 #include <inet/ip.h>
85 85 #include <inet/ip6.h>
86 86
87 87 #include <sys/tsol/label.h>
88 88 #include <sys/tsol/tndb.h>
89 89
90 90 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
91 91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
92 92 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
93 93 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
94 94 extern struct svc_ops rdma_svc_ops;
95 95 extern int nfs_loaned_buffers;
96 96 /* End of Tunables */
97 97
98 98 static int rdma_setup_read_data4(READ4args *, READ4res *);
99 99
100 100 /*
101 101 * Used to bump the stateid4.seqid value and show changes in the stateid
102 102 */
103 103 #define next_stateid(sp) (++(sp)->bits.chgseq)
104 104
105 105 /*
106 106 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
107 107 * This is used to return NFS4ERR_TOOSMALL when clients specify
108 108 * maxcount that isn't large enough to hold the smallest possible
109 109 * XDR encoded dirent.
110 110 *
111 111 * sizeof cookie (8 bytes) +
112 112 * sizeof name_len (4 bytes) +
113 113 * sizeof smallest (padded) name (4 bytes) +
114 114 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
115 115 * sizeof attrlist4_len (4 bytes) +
116 116 * sizeof next boolean (4 bytes)
117 117 *
118 118 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
119 119 * the smallest possible entry4 (assumes no attrs requested).
120 120 * sizeof nfsstat4 (4 bytes) +
121 121 * sizeof verifier4 (8 bytes) +
122 122 * sizeof entry4list bool (4 bytes) +
123 123 * sizeof entry4 (36 bytes) +
124 124 * sizeof eof bool (4 bytes)
125 125 *
126 126 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
127 127 * VOP_READDIR. Its value is the size of the maximum possible dirent
128 128 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
129 129 * required for a given name length. MAXNAMELEN is the maximum
130 130 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
131 131 * macros are to allow for . and .. entries -- just a minor tweak to try
132 132 * and guarantee that buffer we give to VOP_READDIR will be large enough
133 133 * to hold ., .., and the largest possible solaris dirent64.
134 134 */
135 135 #define RFS4_MINLEN_ENTRY4 36
136 136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
137 137 #define RFS4_MINLEN_RDDIR_BUF \
138 138 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
139 139
140 140 /*
141 141 * It would be better to pad to 4 bytes since that's what XDR would do,
142 142 * but the dirents UFS gives us are already padded to 8, so just take
143 143 * what we're given. Dircount is only a hint anyway. Currently the
144 144 * solaris kernel is ASCII only, so there's no point in calling the
145 145 * UTF8 functions.
146 146 *
147 147 * dirent64: named padded to provide 8 byte struct alignment
148 148 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
149 149 *
150 150 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
151 151 *
152 152 */
153 153 #define DIRENT64_TO_DIRCOUNT(dp) \
154 154 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
155 155
156 156 zone_key_t rfs4_zone_key;
157 157
158 158 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
159 159
160 160 u_longlong_t nfs4_srv_caller_id;
161 161 uint_t nfs4_srv_vkey = 0;
162 162
163 163 void rfs4_init_compound_state(struct compound_state *);
164 164
165 165 static void nullfree(caddr_t);
166 166 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 167 struct compound_state *);
168 168 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 169 struct compound_state *);
170 170 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 171 struct compound_state *);
172 172 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 173 struct compound_state *);
174 174 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
175 175 struct compound_state *);
176 176 static void rfs4_op_create_free(nfs_resop4 *resop);
177 177 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
178 178 struct svc_req *, struct compound_state *);
179 179 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
180 180 struct svc_req *, struct compound_state *);
181 181 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
182 182 struct compound_state *);
183 183 static void rfs4_op_getattr_free(nfs_resop4 *);
184 184 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
185 185 struct compound_state *);
186 186 static void rfs4_op_getfh_free(nfs_resop4 *);
187 187 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
188 188 struct compound_state *);
189 189 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
190 190 struct compound_state *);
191 191 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
192 192 struct compound_state *);
193 193 static void lock_denied_free(nfs_resop4 *);
194 194 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 195 struct compound_state *);
196 196 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
197 197 struct compound_state *);
198 198 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 199 struct compound_state *);
200 200 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
201 201 struct compound_state *);
202 202 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
203 203 struct svc_req *req, struct compound_state *cs);
204 204 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
205 205 struct compound_state *);
206 206 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
207 207 struct compound_state *);
208 208 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
209 209 struct svc_req *, struct compound_state *);
210 210 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
211 211 struct svc_req *, struct compound_state *);
212 212 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
213 213 struct compound_state *);
214 214 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
215 215 struct compound_state *);
216 216 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
217 217 struct compound_state *);
218 218 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
219 219 struct compound_state *);
220 220 static void rfs4_op_read_free(nfs_resop4 *);
221 221 static void rfs4_op_readdir_free(nfs_resop4 *resop);
222 222 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
223 223 struct compound_state *);
224 224 static void rfs4_op_readlink_free(nfs_resop4 *);
225 225 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
226 226 struct svc_req *, struct compound_state *);
227 227 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
228 228 struct compound_state *);
229 229 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 230 struct compound_state *);
231 231 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
232 232 struct compound_state *);
233 233 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
234 234 struct compound_state *);
235 235 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
236 236 struct compound_state *);
237 237 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
238 238 struct compound_state *);
239 239 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
240 240 struct compound_state *);
241 241 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
242 242 struct compound_state *);
243 243 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
244 244 struct svc_req *, struct compound_state *);
245 245 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
246 246 struct svc_req *req, struct compound_state *);
247 247 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
248 248 struct compound_state *);
249 249 static void rfs4_op_secinfo_free(nfs_resop4 *);
250 250
251 251 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
252 252 struct svc_req *);
253 253 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
254 254 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
255 255
256 256
257 257 /*
258 258 * translation table for attrs
259 259 */
260 260 struct nfs4_ntov_table {
261 261 union nfs4_attr_u *na;
262 262 uint8_t amap[NFS4_MAXNUM_ATTRS];
263 263 int attrcnt;
264 264 bool_t vfsstat;
265 265 };
266 266
267 267 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
268 268 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
269 269 struct nfs4_svgetit_arg *sargp);
270 270
271 271 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
272 272 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
273 273 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
274 274
275 275 static void hanfsv4_failover(nfs4_srv_t *);
276 276
277 277 fem_t *deleg_rdops;
278 278 fem_t *deleg_wrops;
279 279
280 280 /*
281 281 * NFS4 op dispatch table
282 282 */
283 283
284 284 struct rfsv4disp {
285 285 void (*dis_proc)(); /* proc to call */
286 286 void (*dis_resfree)(); /* frees space allocated by proc */
287 287 int dis_flags; /* RPC_IDEMPOTENT, etc... */
288 288 };
289 289
290 290 static struct rfsv4disp rfsv4disptab[] = {
291 291 /*
292 292 * NFS VERSION 4
293 293 */
294 294
295 295 /* RFS_NULL = 0 */
296 296 {rfs4_op_illegal, nullfree, 0},
297 297
298 298 /* UNUSED = 1 */
299 299 {rfs4_op_illegal, nullfree, 0},
300 300
301 301 /* UNUSED = 2 */
302 302 {rfs4_op_illegal, nullfree, 0},
303 303
304 304 /* OP_ACCESS = 3 */
305 305 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
306 306
307 307 /* OP_CLOSE = 4 */
308 308 {rfs4_op_close, nullfree, 0},
309 309
310 310 /* OP_COMMIT = 5 */
311 311 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
312 312
313 313 /* OP_CREATE = 6 */
314 314 {rfs4_op_create, nullfree, 0},
315 315
316 316 /* OP_DELEGPURGE = 7 */
317 317 {rfs4_op_delegpurge, nullfree, 0},
318 318
319 319 /* OP_DELEGRETURN = 8 */
320 320 {rfs4_op_delegreturn, nullfree, 0},
321 321
322 322 /* OP_GETATTR = 9 */
323 323 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
324 324
325 325 /* OP_GETFH = 10 */
326 326 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
327 327
328 328 /* OP_LINK = 11 */
329 329 {rfs4_op_link, nullfree, 0},
330 330
331 331 /* OP_LOCK = 12 */
332 332 {rfs4_op_lock, lock_denied_free, 0},
333 333
334 334 /* OP_LOCKT = 13 */
335 335 {rfs4_op_lockt, lock_denied_free, 0},
336 336
337 337 /* OP_LOCKU = 14 */
338 338 {rfs4_op_locku, nullfree, 0},
339 339
340 340 /* OP_LOOKUP = 15 */
341 341 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
342 342
343 343 /* OP_LOOKUPP = 16 */
344 344 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
345 345
346 346 /* OP_NVERIFY = 17 */
347 347 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
348 348
349 349 /* OP_OPEN = 18 */
350 350 {rfs4_op_open, rfs4_free_reply, 0},
351 351
352 352 /* OP_OPENATTR = 19 */
353 353 {rfs4_op_openattr, nullfree, 0},
354 354
355 355 /* OP_OPEN_CONFIRM = 20 */
356 356 {rfs4_op_open_confirm, nullfree, 0},
357 357
358 358 /* OP_OPEN_DOWNGRADE = 21 */
359 359 {rfs4_op_open_downgrade, nullfree, 0},
360 360
361 361 /* OP_OPEN_PUTFH = 22 */
362 362 {rfs4_op_putfh, nullfree, RPC_ALL},
363 363
364 364 /* OP_PUTPUBFH = 23 */
365 365 {rfs4_op_putpubfh, nullfree, RPC_ALL},
366 366
367 367 /* OP_PUTROOTFH = 24 */
368 368 {rfs4_op_putrootfh, nullfree, RPC_ALL},
369 369
370 370 /* OP_READ = 25 */
371 371 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
372 372
373 373 /* OP_READDIR = 26 */
374 374 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
375 375
376 376 /* OP_READLINK = 27 */
377 377 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
378 378
379 379 /* OP_REMOVE = 28 */
380 380 {rfs4_op_remove, nullfree, 0},
381 381
382 382 /* OP_RENAME = 29 */
383 383 {rfs4_op_rename, nullfree, 0},
384 384
385 385 /* OP_RENEW = 30 */
386 386 {rfs4_op_renew, nullfree, 0},
387 387
388 388 /* OP_RESTOREFH = 31 */
389 389 {rfs4_op_restorefh, nullfree, RPC_ALL},
390 390
391 391 /* OP_SAVEFH = 32 */
392 392 {rfs4_op_savefh, nullfree, RPC_ALL},
393 393
394 394 /* OP_SECINFO = 33 */
395 395 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
396 396
397 397 /* OP_SETATTR = 34 */
398 398 {rfs4_op_setattr, nullfree, 0},
399 399
400 400 /* OP_SETCLIENTID = 35 */
401 401 {rfs4_op_setclientid, nullfree, 0},
402 402
403 403 /* OP_SETCLIENTID_CONFIRM = 36 */
404 404 {rfs4_op_setclientid_confirm, nullfree, 0},
405 405
406 406 /* OP_VERIFY = 37 */
407 407 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
408 408
409 409 /* OP_WRITE = 38 */
410 410 {rfs4_op_write, nullfree, 0},
411 411
412 412 /* OP_RELEASE_LOCKOWNER = 39 */
413 413 {rfs4_op_release_lockowner, nullfree, 0},
414 414 };
415 415
416 416 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
417 417
418 418 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
419 419
420 420 #ifdef DEBUG
421 421
422 422 int rfs4_fillone_debug = 0;
423 423 int rfs4_no_stub_access = 1;
424 424 int rfs4_rddir_debug = 0;
425 425
426 426 static char *rfs4_op_string[] = {
427 427 "rfs4_op_null",
428 428 "rfs4_op_1 unused",
429 429 "rfs4_op_2 unused",
430 430 "rfs4_op_access",
431 431 "rfs4_op_close",
432 432 "rfs4_op_commit",
433 433 "rfs4_op_create",
434 434 "rfs4_op_delegpurge",
435 435 "rfs4_op_delegreturn",
436 436 "rfs4_op_getattr",
437 437 "rfs4_op_getfh",
438 438 "rfs4_op_link",
439 439 "rfs4_op_lock",
440 440 "rfs4_op_lockt",
441 441 "rfs4_op_locku",
442 442 "rfs4_op_lookup",
443 443 "rfs4_op_lookupp",
444 444 "rfs4_op_nverify",
445 445 "rfs4_op_open",
446 446 "rfs4_op_openattr",
447 447 "rfs4_op_open_confirm",
448 448 "rfs4_op_open_downgrade",
449 449 "rfs4_op_putfh",
450 450 "rfs4_op_putpubfh",
451 451 "rfs4_op_putrootfh",
452 452 "rfs4_op_read",
453 453 "rfs4_op_readdir",
454 454 "rfs4_op_readlink",
455 455 "rfs4_op_remove",
456 456 "rfs4_op_rename",
457 457 "rfs4_op_renew",
458 458 "rfs4_op_restorefh",
459 459 "rfs4_op_savefh",
460 460 "rfs4_op_secinfo",
461 461 "rfs4_op_setattr",
462 462 "rfs4_op_setclientid",
463 463 "rfs4_op_setclient_confirm",
464 464 "rfs4_op_verify",
465 465 "rfs4_op_write",
466 466 "rfs4_op_release_lockowner",
467 467 "rfs4_op_illegal"
468 468 };
469 469 #endif
470 470
471 471 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
472 472
473 473 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
474 474
475 475 extern void rfs4_free_fs_locations4(fs_locations4 *);
476 476
477 477 #ifdef nextdp
478 478 #undef nextdp
479 479 #endif
480 480 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
481 481
482 482 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
483 483 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
484 484 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
485 485 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
486 486 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
487 487 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
488 488 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
489 489 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
490 490 NULL, NULL
491 491 };
492 492 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
493 493 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
494 494 VOPNAME_READ, { .femop_read = deleg_wr_read },
495 495 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
496 496 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
497 497 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
498 498 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
499 499 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
500 500 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
501 501 NULL, NULL
502 502 };
503 503
504 504 /* ARGSUSED */
505 505 static void *
506 506 rfs4_zone_init(zoneid_t zoneid)
507 507 {
508 508 nfs4_srv_t *nsrv4;
509 509 timespec32_t verf;
510 510
511 511 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
512 512
513 513 /*
514 514 * The following algorithm attempts to find a unique verifier
515 515 * to be used as the write verifier returned from the server
516 516 * to the client. It is important that this verifier change
517 517 * whenever the server reboots. Of secondary importance, it
518 518 * is important for the verifier to be unique between two
519 519 * different servers.
520 520 *
521 521 * Thus, an attempt is made to use the system hostid and the
522 522 * current time in seconds when the nfssrv kernel module is
523 523 * loaded. It is assumed that an NFS server will not be able
524 524 * to boot and then to reboot in less than a second. If the
525 525 * hostid has not been set, then the current high resolution
526 526 * time is used. This will ensure different verifiers each
527 527 * time the server reboots and minimize the chances that two
528 528 * different servers will have the same verifier.
529 529 * XXX - this is broken on LP64 kernels.
530 530 */
531 531 verf.tv_sec = (time_t)zone_get_hostid(NULL);
532 532 if (verf.tv_sec != 0) {
533 533 verf.tv_nsec = gethrestime_sec();
534 534 } else {
535 535 timespec_t tverf;
536 536
537 537 gethrestime(&tverf);
538 538 verf.tv_sec = (time_t)tverf.tv_sec;
539 539 verf.tv_nsec = tverf.tv_nsec;
540 540 }
541 541 nsrv4->write4verf = *(uint64_t *)&verf;
542 542
543 543 /* Used to manage create/destroy of server state */
544 544 nsrv4->nfs4_server_state = NULL;
545 545 nsrv4->nfs4_cur_servinst = NULL;
546 546 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
547 547 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
548 548 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
549 549 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
550 550 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
551 551
552 552 return (nsrv4);
553 553 }
554 554
555 555 /* ARGSUSED */
556 556 static void
557 557 rfs4_zone_fini(zoneid_t zoneid, void *data)
558 558 {
559 559 nfs4_srv_t *nsrv4 = data;
560 560
561 561 mutex_destroy(&nsrv4->deleg_lock);
562 562 mutex_destroy(&nsrv4->state_lock);
563 563 mutex_destroy(&nsrv4->servinst_lock);
564 564 rw_destroy(&nsrv4->deleg_policy_lock);
565 565
566 566 kmem_free(nsrv4, sizeof (*nsrv4));
567 567 }
568 568
569 569 void
570 570 rfs4_srvrinit(void)
571 571 {
572 572 extern void rfs4_attr_init();
573 573
574 574 zone_key_create(&rfs4_zone_key, rfs4_zone_init, NULL, rfs4_zone_fini);
575 575
576 576 rfs4_attr_init();
577 577
578 578
579 579 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
580 580 rfs4_disable_delegation();
581 581 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
582 582 &deleg_wrops) != 0) {
583 583 rfs4_disable_delegation();
584 584 fem_free(deleg_rdops);
585 585 }
586 586
587 587 nfs4_srv_caller_id = fs_new_caller_id();
588 588 lockt_sysid = lm_alloc_sysidt();
589 589 vsd_create(&nfs4_srv_vkey, NULL);
590 590 rfs4_state_g_init();
591 591 }
592 592
593 593 void
594 594 rfs4_srvrfini(void)
595 595 {
596 596 if (lockt_sysid != LM_NOSYSID) {
597 597 lm_free_sysidt(lockt_sysid);
598 598 lockt_sysid = LM_NOSYSID;
599 599 }
600 600
601 601 rfs4_state_g_fini();
602 602
603 603 fem_free(deleg_rdops);
604 604 fem_free(deleg_wrops);
605 605
606 606 (void) zone_key_delete(rfs4_zone_key);
607 607 }
608 608
609 609 void
610 610 rfs4_do_server_start(int server_upordown,
611 611 int srv_delegation, int cluster_booted)
612 612 {
613 613 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
614 614
615 615 /* Is this a warm start? */
616 616 if (server_upordown == NFS_SERVER_QUIESCED) {
617 617 cmn_err(CE_NOTE, "nfs4_srv: "
618 618 "server was previously quiesced; "
619 619 "existing NFSv4 state will be re-used");
620 620
621 621 /*
622 622 * HA-NFSv4: this is also the signal
623 623 * that a Resource Group failover has
624 624 * occurred.
625 625 */
626 626 if (cluster_booted)
627 627 hanfsv4_failover(nsrv4);
628 628 } else {
629 629 /* Cold start */
630 630 nsrv4->rfs4_start_time = 0;
631 631 rfs4_state_zone_init(nsrv4);
632 632 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
633 633 nfs4_drc_hash);
634 634
635 635 /*
636 636 * The nfsd service was started with the -s option
637 637 * we need to pull in any state from the paths indicated.
638 638 */
639 639 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
640 640 /* read in the stable storage state from these paths */
641 641 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
642 642 rfs4_dss_newpaths);
643 643 }
644 644 }
645 645
646 646 /* Check if delegation is to be enabled */
647 647 if (srv_delegation != FALSE)
648 648 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
649 649 }
650 650
651 651 void
652 652 rfs4_init_compound_state(struct compound_state *cs)
653 653 {
654 654 bzero(cs, sizeof (*cs));
655 655 cs->cont = TRUE;
656 656 cs->access = CS_ACCESS_DENIED;
657 657 cs->deleg = FALSE;
658 658 cs->mandlock = FALSE;
659 659 cs->fh.nfs_fh4_val = cs->fhbuf;
660 660 }
661 661
662 662 void
663 663 rfs4_grace_start(rfs4_servinst_t *sip)
664 664 {
665 665 rw_enter(&sip->rwlock, RW_WRITER);
666 666 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
667 667 sip->grace_period = rfs4_grace_period;
668 668 rw_exit(&sip->rwlock);
669 669 }
670 670
671 671 /*
672 672 * returns true if the instance's grace period has never been started
673 673 */
674 674 int
675 675 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
676 676 {
677 677 time_t start_time;
678 678
679 679 rw_enter(&sip->rwlock, RW_READER);
680 680 start_time = sip->start_time;
681 681 rw_exit(&sip->rwlock);
682 682
683 683 return (start_time == 0);
684 684 }
685 685
686 686 /*
687 687 * Indicates if server instance is within the
688 688 * grace period.
689 689 */
690 690 int
691 691 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
692 692 {
693 693 time_t grace_expiry;
694 694
695 695 rw_enter(&sip->rwlock, RW_READER);
696 696 grace_expiry = sip->start_time + sip->grace_period;
697 697 rw_exit(&sip->rwlock);
698 698
699 699 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
700 700 }
701 701
702 702 int
703 703 rfs4_clnt_in_grace(rfs4_client_t *cp)
704 704 {
705 705 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
706 706
707 707 return (rfs4_servinst_in_grace(cp->rc_server_instance));
708 708 }
709 709
710 710 /*
711 711 * reset all currently active grace periods
712 712 */
713 713 void
714 714 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
715 715 {
716 716 rfs4_servinst_t *sip;
717 717
718 718 mutex_enter(&nsrv4->servinst_lock);
719 719 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
720 720 if (rfs4_servinst_in_grace(sip))
721 721 rfs4_grace_start(sip);
722 722 mutex_exit(&nsrv4->servinst_lock);
723 723 }
724 724
725 725 /*
726 726 * start any new instances' grace periods
727 727 */
728 728 void
729 729 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
730 730 {
731 731 rfs4_servinst_t *sip;
732 732
733 733 mutex_enter(&nsrv4->servinst_lock);
734 734 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
735 735 if (rfs4_servinst_grace_new(sip))
736 736 rfs4_grace_start(sip);
737 737 mutex_exit(&nsrv4->servinst_lock);
738 738 }
739 739
740 740 static rfs4_dss_path_t *
741 741 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
742 742 char *path, unsigned index)
743 743 {
744 744 size_t len;
745 745 rfs4_dss_path_t *dss_path;
746 746
747 747 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
748 748
749 749 /*
750 750 * Take a copy of the string, since the original may be overwritten.
751 751 * Sadly, no strdup() in the kernel.
752 752 */
753 753 /* allow for NUL */
754 754 len = strlen(path) + 1;
755 755 dss_path->path = kmem_alloc(len, KM_SLEEP);
756 756 (void) strlcpy(dss_path->path, path, len);
757 757
758 758 /* associate with servinst */
759 759 dss_path->sip = sip;
760 760 dss_path->index = index;
761 761
762 762 /*
763 763 * Add to list of served paths.
764 764 * No locking required, as we're only ever called at startup.
765 765 */
766 766 if (nsrv4->dss_pathlist == NULL) {
767 767 /* this is the first dss_path_t */
768 768
769 769 /* needed for insque/remque */
770 770 dss_path->next = dss_path->prev = dss_path;
771 771
772 772 nsrv4->dss_pathlist = dss_path;
773 773 } else {
774 774 insque(dss_path, nsrv4->dss_pathlist);
775 775 }
776 776
777 777 return (dss_path);
778 778 }
779 779
780 780 /*
781 781 * Create a new server instance, and make it the currently active instance.
782 782 * Note that starting the grace period too early will reduce the clients'
783 783 * recovery window.
784 784 */
785 785 void
786 786 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
787 787 int dss_npaths, char **dss_paths)
788 788 {
789 789 unsigned i;
790 790 rfs4_servinst_t *sip;
791 791 rfs4_oldstate_t *oldstate;
792 792
793 793 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
794 794 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
795 795
796 796 sip->start_time = (time_t)0;
797 797 sip->grace_period = (time_t)0;
798 798 sip->next = NULL;
799 799 sip->prev = NULL;
800 800
801 801 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
802 802 /*
803 803 * This initial dummy entry is required to setup for insque/remque.
804 804 * It must be skipped over whenever the list is traversed.
805 805 */
806 806 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
807 807 /* insque/remque require initial list entry to be self-terminated */
|
↓ open down ↓ |
807 lines elided |
↑ open up ↑ |
808 808 oldstate->next = oldstate;
809 809 oldstate->prev = oldstate;
810 810 sip->oldstate = oldstate;
811 811
812 812
813 813 sip->dss_npaths = dss_npaths;
814 814 sip->dss_paths = kmem_alloc(dss_npaths *
815 815 sizeof (rfs4_dss_path_t *), KM_SLEEP);
816 816
817 817 for (i = 0; i < dss_npaths; i++) {
818 - /* CSTYLED */
819 - sip->dss_paths[i] = rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
818 + sip->dss_paths[i] =
819 + rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
820 820 }
821 821
822 822 mutex_enter(&nsrv4->servinst_lock);
823 823 if (nsrv4->nfs4_cur_servinst != NULL) {
824 824 /* add to linked list */
825 825 sip->prev = nsrv4->nfs4_cur_servinst;
826 826 nsrv4->nfs4_cur_servinst->next = sip;
827 827 }
828 828 if (start_grace)
829 829 rfs4_grace_start(sip);
830 830 /* make the new instance "current" */
831 831 nsrv4->nfs4_cur_servinst = sip;
832 832
833 833 mutex_exit(&nsrv4->servinst_lock);
834 834 }
835 835
836 836 /*
837 837 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
838 838 * all instances directly.
839 839 */
840 840 void
841 841 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
842 842 {
843 843 rfs4_servinst_t *sip, *prev, *current;
844 844 #ifdef DEBUG
845 845 int n = 0;
846 846 #endif
|
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
847 847
848 848 mutex_enter(&nsrv4->servinst_lock);
849 849 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
850 850 current = nsrv4->nfs4_cur_servinst;
851 851 nsrv4->nfs4_cur_servinst = NULL;
852 852 for (sip = current; sip != NULL; sip = prev) {
853 853 prev = sip->prev;
854 854 rw_destroy(&sip->rwlock);
855 855 if (sip->oldstate)
856 856 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
857 - if (sip->dss_paths)
857 + if (sip->dss_paths) {
858 + int i = sip->dss_npaths;
859 +
860 + while (i > 0) {
861 + i--;
862 + if (sip->dss_paths[i] != NULL) {
863 + char *path = sip->dss_paths[i]->path;
864 +
865 + if (path != NULL) {
866 + kmem_free(path,
867 + strlen(path) + 1);
868 + }
869 + kmem_free(sip->dss_paths[i],
870 + sizeof (rfs4_dss_path_t));
871 + }
872 + }
858 873 kmem_free(sip->dss_paths,
859 874 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
875 + }
860 876 kmem_free(sip, sizeof (rfs4_servinst_t));
861 877 #ifdef DEBUG
862 878 n++;
863 879 #endif
864 880 }
865 881 mutex_exit(&nsrv4->servinst_lock);
866 882 }
867 883
868 884 /*
869 885 * Assign the current server instance to a client_t.
870 886 * Should be called with cp->rc_dbe held.
871 887 */
872 888 void
873 889 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
874 890 rfs4_servinst_t *sip)
875 891 {
876 892 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
877 893
878 894 /*
879 895 * The lock ensures that if the current instance is in the process
880 896 * of changing, we will see the new one.
881 897 */
882 898 mutex_enter(&nsrv4->servinst_lock);
883 899 cp->rc_server_instance = sip;
884 900 mutex_exit(&nsrv4->servinst_lock);
885 901 }
886 902
887 903 rfs4_servinst_t *
888 904 rfs4_servinst(rfs4_client_t *cp)
889 905 {
890 906 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
891 907
892 908 return (cp->rc_server_instance);
893 909 }
894 910
895 911 /* ARGSUSED */
896 912 static void
897 913 nullfree(caddr_t resop)
898 914 {
899 915 }
900 916
901 917 /*
902 918 * This is a fall-through for invalid or not implemented (yet) ops
903 919 */
904 920 /* ARGSUSED */
905 921 static void
906 922 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
907 923 struct compound_state *cs)
908 924 {
909 925 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
910 926 }
911 927
912 928 /*
913 929 * Check if the security flavor, nfsnum, is in the flavor_list.
914 930 */
915 931 bool_t
916 932 in_flavor_list(int nfsnum, int *flavor_list, int count)
917 933 {
918 934 int i;
919 935
920 936 for (i = 0; i < count; i++) {
921 937 if (nfsnum == flavor_list[i])
922 938 return (TRUE);
923 939 }
924 940 return (FALSE);
925 941 }
926 942
927 943 /*
928 944 * Used by rfs4_op_secinfo to get the security information from the
929 945 * export structure associated with the component.
930 946 */
931 947 /* ARGSUSED */
932 948 static nfsstat4
933 949 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
934 950 {
935 951 int error, different_export = 0;
936 952 vnode_t *dvp, *vp;
937 953 struct exportinfo *exi = NULL;
938 954 fid_t fid;
939 955 uint_t count, i;
940 956 secinfo4 *resok_val;
941 957 struct secinfo *secp;
942 958 seconfig_t *si;
943 959 bool_t did_traverse = FALSE;
944 960 int dotdot, walk;
945 961 nfs_export_t *ne = nfs_get_export();
946 962
947 963 dvp = cs->vp;
948 964 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
949 965
950 966 /*
951 967 * If dotdotting, then need to check whether it's above the
952 968 * root of a filesystem, or above an export point.
953 969 */
954 970 if (dotdot) {
955 971
956 972 /*
957 973 * If dotdotting at the root of a filesystem, then
958 974 * need to traverse back to the mounted-on filesystem
959 975 * and do the dotdot lookup there.
960 976 */
961 977 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
962 978
963 979 /*
964 980 * If at the system root, then can
965 981 * go up no further.
966 982 */
967 983 if (VN_CMP(dvp, ZONE_ROOTVP()))
968 984 return (puterrno4(ENOENT));
969 985
970 986 /*
971 987 * Traverse back to the mounted-on filesystem
972 988 */
973 989 dvp = untraverse(cs->vp);
974 990
975 991 /*
976 992 * Set the different_export flag so we remember
977 993 * to pick up a new exportinfo entry for
978 994 * this new filesystem.
979 995 */
980 996 different_export = 1;
981 997 } else {
982 998
983 999 /*
984 1000 * If dotdotting above an export point then set
985 1001 * the different_export to get new export info.
986 1002 */
987 1003 different_export = nfs_exported(cs->exi, cs->vp);
988 1004 }
989 1005 }
990 1006
991 1007 /*
992 1008 * Get the vnode for the component "nm".
993 1009 */
994 1010 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
995 1011 NULL, NULL, NULL);
996 1012 if (error)
997 1013 return (puterrno4(error));
998 1014
999 1015 /*
1000 1016 * If the vnode is in a pseudo filesystem, or if the security flavor
1001 1017 * used in the request is valid but not an explicitly shared flavor,
1002 1018 * or the access bit indicates that this is a limited access,
1003 1019 * check whether this vnode is visible.
1004 1020 */
1005 1021 if (!different_export &&
1006 1022 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
1007 1023 cs->access & CS_ACCESS_LIMITED)) {
1008 1024 if (! nfs_visible(cs->exi, vp, &different_export)) {
1009 1025 VN_RELE(vp);
1010 1026 return (puterrno4(ENOENT));
1011 1027 }
1012 1028 }
1013 1029
1014 1030 /*
1015 1031 * If it's a mountpoint, then traverse it.
1016 1032 */
1017 1033 if (vn_ismntpt(vp)) {
1018 1034 if ((error = traverse(&vp)) != 0) {
1019 1035 VN_RELE(vp);
1020 1036 return (puterrno4(error));
1021 1037 }
1022 1038 /* remember that we had to traverse mountpoint */
1023 1039 did_traverse = TRUE;
1024 1040 different_export = 1;
1025 1041 } else if (vp->v_vfsp != dvp->v_vfsp) {
1026 1042 /*
1027 1043 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1028 1044 * then vp is probably an LOFS object. We don't need the
1029 1045 * realvp, we just need to know that we might have crossed
1030 1046 * a server fs boundary and need to call checkexport4.
1031 1047 * (LOFS lookup hides server fs mountpoints, and actually calls
1032 1048 * traverse)
1033 1049 */
1034 1050 different_export = 1;
1035 1051 }
1036 1052
1037 1053 /*
1038 1054 * Get the export information for it.
1039 1055 */
1040 1056 if (different_export) {
1041 1057
1042 1058 bzero(&fid, sizeof (fid));
1043 1059 fid.fid_len = MAXFIDSZ;
1044 1060 error = vop_fid_pseudo(vp, &fid);
1045 1061 if (error) {
1046 1062 VN_RELE(vp);
1047 1063 return (puterrno4(error));
1048 1064 }
1049 1065
1050 1066 if (dotdot)
1051 1067 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1052 1068 else
1053 1069 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1054 1070
1055 1071 if (exi == NULL) {
1056 1072 if (did_traverse == TRUE) {
1057 1073 /*
1058 1074 * If this vnode is a mounted-on vnode,
1059 1075 * but the mounted-on file system is not
1060 1076 * exported, send back the secinfo for
1061 1077 * the exported node that the mounted-on
1062 1078 * vnode lives in.
1063 1079 */
1064 1080 exi = cs->exi;
1065 1081 } else {
1066 1082 VN_RELE(vp);
1067 1083 return (puterrno4(EACCES));
1068 1084 }
1069 1085 }
1070 1086 } else {
1071 1087 exi = cs->exi;
1072 1088 }
1073 1089 ASSERT(exi != NULL);
1074 1090
1075 1091
1076 1092 /*
1077 1093 * Create the secinfo result based on the security information
1078 1094 * from the exportinfo structure (exi).
1079 1095 *
1080 1096 * Return all flavors for a pseudo node.
1081 1097 * For a real export node, return the flavor that the client
1082 1098 * has access with.
1083 1099 */
1084 1100 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1085 1101 if (PSEUDO(exi)) {
1086 1102 count = exi->exi_export.ex_seccnt; /* total sec count */
1087 1103 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1088 1104 secp = exi->exi_export.ex_secinfo;
1089 1105
1090 1106 for (i = 0; i < count; i++) {
1091 1107 si = &secp[i].s_secinfo;
1092 1108 resok_val[i].flavor = si->sc_rpcnum;
1093 1109 if (resok_val[i].flavor == RPCSEC_GSS) {
1094 1110 rpcsec_gss_info *info;
1095 1111
1096 1112 info = &resok_val[i].flavor_info;
1097 1113 info->qop = si->sc_qop;
1098 1114 info->service = (rpc_gss_svc_t)si->sc_service;
1099 1115
1100 1116 /* get oid opaque data */
1101 1117 info->oid.sec_oid4_len =
1102 1118 si->sc_gss_mech_type->length;
1103 1119 info->oid.sec_oid4_val = kmem_alloc(
1104 1120 si->sc_gss_mech_type->length, KM_SLEEP);
1105 1121 bcopy(
1106 1122 si->sc_gss_mech_type->elements,
1107 1123 info->oid.sec_oid4_val,
1108 1124 info->oid.sec_oid4_len);
1109 1125 }
1110 1126 }
1111 1127 resp->SECINFO4resok_len = count;
1112 1128 resp->SECINFO4resok_val = resok_val;
1113 1129 } else {
1114 1130 int ret_cnt = 0, k = 0;
1115 1131 int *flavor_list;
1116 1132
1117 1133 count = exi->exi_export.ex_seccnt; /* total sec count */
1118 1134 secp = exi->exi_export.ex_secinfo;
1119 1135
1120 1136 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1121 1137 /* find out which flavors to return */
1122 1138 for (i = 0; i < count; i ++) {
1123 1139 int access, flavor, perm;
1124 1140
1125 1141 flavor = secp[i].s_secinfo.sc_nfsnum;
1126 1142 perm = secp[i].s_flags;
1127 1143
1128 1144 access = nfsauth4_secinfo_access(exi, cs->req,
1129 1145 flavor, perm, cs->basecr);
1130 1146
1131 1147 if (! (access & NFSAUTH_DENIED) &&
1132 1148 ! (access & NFSAUTH_WRONGSEC)) {
1133 1149 flavor_list[ret_cnt] = flavor;
1134 1150 ret_cnt++;
1135 1151 }
1136 1152 }
1137 1153
1138 1154 /* Create the returning SECINFO value */
1139 1155 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1140 1156
1141 1157 for (i = 0; i < count; i++) {
1142 1158 /*
1143 1159 * If the flavor is in the flavor list,
1144 1160 * fill in resok_val.
1145 1161 */
1146 1162 si = &secp[i].s_secinfo;
1147 1163 if (in_flavor_list(si->sc_nfsnum,
1148 1164 flavor_list, ret_cnt)) {
1149 1165 resok_val[k].flavor = si->sc_rpcnum;
1150 1166 if (resok_val[k].flavor == RPCSEC_GSS) {
1151 1167 rpcsec_gss_info *info;
1152 1168
1153 1169 info = &resok_val[k].flavor_info;
1154 1170 info->qop = si->sc_qop;
1155 1171 info->service = (rpc_gss_svc_t)
1156 1172 si->sc_service;
1157 1173
1158 1174 /* get oid opaque data */
1159 1175 info->oid.sec_oid4_len =
1160 1176 si->sc_gss_mech_type->length;
1161 1177 info->oid.sec_oid4_val = kmem_alloc(
1162 1178 si->sc_gss_mech_type->length,
1163 1179 KM_SLEEP);
1164 1180 bcopy(si->sc_gss_mech_type->elements,
1165 1181 info->oid.sec_oid4_val,
1166 1182 info->oid.sec_oid4_len);
1167 1183 }
1168 1184 k++;
1169 1185 }
1170 1186 if (k >= ret_cnt)
1171 1187 break;
1172 1188 }
1173 1189 resp->SECINFO4resok_len = ret_cnt;
1174 1190 resp->SECINFO4resok_val = resok_val;
1175 1191 kmem_free(flavor_list, count * sizeof (int));
1176 1192 }
1177 1193
1178 1194 VN_RELE(vp);
1179 1195 return (NFS4_OK);
1180 1196 }
1181 1197
1182 1198 /*
1183 1199 * SECINFO (Operation 33): Obtain required security information on
1184 1200 * the component name in the format of (security-mechanism-oid, qop, service)
1185 1201 * triplets.
1186 1202 */
1187 1203 /* ARGSUSED */
1188 1204 static void
1189 1205 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1190 1206 struct compound_state *cs)
1191 1207 {
1192 1208 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1193 1209 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1194 1210 utf8string *utfnm = &args->name;
1195 1211 uint_t len;
1196 1212 char *nm;
1197 1213 struct sockaddr *ca;
1198 1214 char *name = NULL;
1199 1215 nfsstat4 status = NFS4_OK;
1200 1216
1201 1217 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1202 1218 SECINFO4args *, args);
1203 1219
1204 1220 /*
1205 1221 * Current file handle (cfh) should have been set before getting
1206 1222 * into this function. If not, return error.
1207 1223 */
1208 1224 if (cs->vp == NULL) {
1209 1225 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1210 1226 goto out;
1211 1227 }
1212 1228
1213 1229 if (cs->vp->v_type != VDIR) {
1214 1230 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1215 1231 goto out;
1216 1232 }
1217 1233
1218 1234 /*
1219 1235 * Verify the component name. If failed, error out, but
1220 1236 * do not error out if the component name is a "..".
1221 1237 * SECINFO will return its parents secinfo data for SECINFO "..".
1222 1238 */
1223 1239 status = utf8_dir_verify(utfnm);
1224 1240 if (status != NFS4_OK) {
1225 1241 if (utfnm->utf8string_len != 2 ||
1226 1242 utfnm->utf8string_val[0] != '.' ||
1227 1243 utfnm->utf8string_val[1] != '.') {
1228 1244 *cs->statusp = resp->status = status;
1229 1245 goto out;
1230 1246 }
1231 1247 }
1232 1248
1233 1249 nm = utf8_to_str(utfnm, &len, NULL);
1234 1250 if (nm == NULL) {
1235 1251 *cs->statusp = resp->status = NFS4ERR_INVAL;
1236 1252 goto out;
1237 1253 }
1238 1254
1239 1255 if (len > MAXNAMELEN) {
1240 1256 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1241 1257 kmem_free(nm, len);
1242 1258 goto out;
1243 1259 }
1244 1260
1245 1261 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1246 1262 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1247 1263 MAXPATHLEN + 1);
1248 1264
1249 1265 if (name == NULL) {
1250 1266 *cs->statusp = resp->status = NFS4ERR_INVAL;
1251 1267 kmem_free(nm, len);
1252 1268 goto out;
1253 1269 }
1254 1270
1255 1271
1256 1272 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1257 1273
1258 1274 if (name != nm)
1259 1275 kmem_free(name, MAXPATHLEN + 1);
1260 1276 kmem_free(nm, len);
1261 1277
1262 1278 out:
1263 1279 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1264 1280 SECINFO4res *, resp);
1265 1281 }
1266 1282
1267 1283 /*
1268 1284 * Free SECINFO result.
1269 1285 */
1270 1286 /* ARGSUSED */
1271 1287 static void
1272 1288 rfs4_op_secinfo_free(nfs_resop4 *resop)
1273 1289 {
1274 1290 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1275 1291 int count, i;
1276 1292 secinfo4 *resok_val;
1277 1293
1278 1294 /* If this is not an Ok result, nothing to free. */
1279 1295 if (resp->status != NFS4_OK) {
1280 1296 return;
1281 1297 }
1282 1298
1283 1299 count = resp->SECINFO4resok_len;
1284 1300 resok_val = resp->SECINFO4resok_val;
1285 1301
1286 1302 for (i = 0; i < count; i++) {
1287 1303 if (resok_val[i].flavor == RPCSEC_GSS) {
1288 1304 rpcsec_gss_info *info;
1289 1305
1290 1306 info = &resok_val[i].flavor_info;
1291 1307 kmem_free(info->oid.sec_oid4_val,
1292 1308 info->oid.sec_oid4_len);
1293 1309 }
1294 1310 }
1295 1311 kmem_free(resok_val, count * sizeof (secinfo4));
1296 1312 resp->SECINFO4resok_len = 0;
1297 1313 resp->SECINFO4resok_val = NULL;
1298 1314 }
1299 1315
1300 1316 /* ARGSUSED */
1301 1317 static void
1302 1318 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1303 1319 struct compound_state *cs)
1304 1320 {
1305 1321 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1306 1322 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1307 1323 int error;
1308 1324 vnode_t *vp;
1309 1325 struct vattr va;
1310 1326 int checkwriteperm;
1311 1327 cred_t *cr = cs->cr;
1312 1328 bslabel_t *clabel, *slabel;
1313 1329 ts_label_t *tslabel;
1314 1330 boolean_t admin_low_client;
1315 1331
1316 1332 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1317 1333 ACCESS4args *, args);
1318 1334
1319 1335 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1320 1336 if (cs->access == CS_ACCESS_DENIED) {
1321 1337 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1322 1338 goto out;
1323 1339 }
1324 1340 #endif
1325 1341 if (cs->vp == NULL) {
1326 1342 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1327 1343 goto out;
1328 1344 }
1329 1345
1330 1346 ASSERT(cr != NULL);
1331 1347
1332 1348 vp = cs->vp;
1333 1349
1334 1350 /*
1335 1351 * If the file system is exported read only, it is not appropriate
1336 1352 * to check write permissions for regular files and directories.
1337 1353 * Special files are interpreted by the client, so the underlying
1338 1354 * permissions are sent back to the client for interpretation.
1339 1355 */
1340 1356 if (rdonly4(req, cs) &&
1341 1357 (vp->v_type == VREG || vp->v_type == VDIR))
1342 1358 checkwriteperm = 0;
1343 1359 else
1344 1360 checkwriteperm = 1;
1345 1361
1346 1362 /*
1347 1363 * XXX
1348 1364 * We need the mode so that we can correctly determine access
1349 1365 * permissions relative to a mandatory lock file. Access to
1350 1366 * mandatory lock files is denied on the server, so it might
1351 1367 * as well be reflected to the server during the open.
1352 1368 */
1353 1369 va.va_mask = AT_MODE;
1354 1370 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1355 1371 if (error) {
1356 1372 *cs->statusp = resp->status = puterrno4(error);
1357 1373 goto out;
1358 1374 }
1359 1375 resp->access = 0;
1360 1376 resp->supported = 0;
1361 1377
1362 1378 if (is_system_labeled()) {
1363 1379 ASSERT(req->rq_label != NULL);
1364 1380 clabel = req->rq_label;
1365 1381 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1366 1382 "got client label from request(1)",
1367 1383 struct svc_req *, req);
1368 1384 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1369 1385 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1370 1386 *cs->statusp = resp->status = puterrno4(EACCES);
1371 1387 goto out;
1372 1388 }
1373 1389 slabel = label2bslabel(tslabel);
1374 1390 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1375 1391 char *, "got server label(1) for vp(2)",
1376 1392 bslabel_t *, slabel, vnode_t *, vp);
1377 1393
1378 1394 admin_low_client = B_FALSE;
1379 1395 } else
1380 1396 admin_low_client = B_TRUE;
1381 1397 }
1382 1398
1383 1399 if (args->access & ACCESS4_READ) {
1384 1400 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1385 1401 if (!error && !MANDLOCK(vp, va.va_mode) &&
1386 1402 (!is_system_labeled() || admin_low_client ||
1387 1403 bldominates(clabel, slabel)))
1388 1404 resp->access |= ACCESS4_READ;
1389 1405 resp->supported |= ACCESS4_READ;
1390 1406 }
1391 1407 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1392 1408 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1393 1409 if (!error && (!is_system_labeled() || admin_low_client ||
1394 1410 bldominates(clabel, slabel)))
1395 1411 resp->access |= ACCESS4_LOOKUP;
1396 1412 resp->supported |= ACCESS4_LOOKUP;
1397 1413 }
1398 1414 if (checkwriteperm &&
1399 1415 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1400 1416 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1401 1417 if (!error && !MANDLOCK(vp, va.va_mode) &&
1402 1418 (!is_system_labeled() || admin_low_client ||
1403 1419 blequal(clabel, slabel)))
1404 1420 resp->access |=
1405 1421 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1406 1422 resp->supported |=
1407 1423 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1408 1424 }
1409 1425
1410 1426 if (checkwriteperm &&
1411 1427 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1412 1428 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1413 1429 if (!error && (!is_system_labeled() || admin_low_client ||
1414 1430 blequal(clabel, slabel)))
1415 1431 resp->access |= ACCESS4_DELETE;
1416 1432 resp->supported |= ACCESS4_DELETE;
1417 1433 }
1418 1434 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1419 1435 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1420 1436 if (!error && !MANDLOCK(vp, va.va_mode) &&
1421 1437 (!is_system_labeled() || admin_low_client ||
1422 1438 bldominates(clabel, slabel)))
1423 1439 resp->access |= ACCESS4_EXECUTE;
1424 1440 resp->supported |= ACCESS4_EXECUTE;
1425 1441 }
1426 1442
1427 1443 if (is_system_labeled() && !admin_low_client)
1428 1444 label_rele(tslabel);
1429 1445
1430 1446 *cs->statusp = resp->status = NFS4_OK;
1431 1447 out:
1432 1448 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1433 1449 ACCESS4res *, resp);
1434 1450 }
1435 1451
1436 1452 /* ARGSUSED */
1437 1453 static void
1438 1454 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1439 1455 struct compound_state *cs)
1440 1456 {
1441 1457 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1442 1458 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1443 1459 int error;
1444 1460 vnode_t *vp = cs->vp;
1445 1461 cred_t *cr = cs->cr;
1446 1462 vattr_t va;
1447 1463 nfs4_srv_t *nsrv4;
1448 1464
1449 1465 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1450 1466 COMMIT4args *, args);
1451 1467
1452 1468 if (vp == NULL) {
1453 1469 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1454 1470 goto out;
1455 1471 }
1456 1472 if (cs->access == CS_ACCESS_DENIED) {
1457 1473 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1458 1474 goto out;
1459 1475 }
1460 1476
1461 1477 if (args->offset + args->count < args->offset) {
1462 1478 *cs->statusp = resp->status = NFS4ERR_INVAL;
1463 1479 goto out;
1464 1480 }
1465 1481
1466 1482 va.va_mask = AT_UID;
1467 1483 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1468 1484
1469 1485 /*
1470 1486 * If we can't get the attributes, then we can't do the
1471 1487 * right access checking. So, we'll fail the request.
1472 1488 */
1473 1489 if (error) {
1474 1490 *cs->statusp = resp->status = puterrno4(error);
1475 1491 goto out;
1476 1492 }
1477 1493 if (rdonly4(req, cs)) {
1478 1494 *cs->statusp = resp->status = NFS4ERR_ROFS;
1479 1495 goto out;
1480 1496 }
1481 1497
1482 1498 if (vp->v_type != VREG) {
1483 1499 if (vp->v_type == VDIR)
1484 1500 resp->status = NFS4ERR_ISDIR;
1485 1501 else
1486 1502 resp->status = NFS4ERR_INVAL;
1487 1503 *cs->statusp = resp->status;
1488 1504 goto out;
1489 1505 }
1490 1506
1491 1507 if (crgetuid(cr) != va.va_uid &&
1492 1508 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1493 1509 *cs->statusp = resp->status = puterrno4(error);
1494 1510 goto out;
1495 1511 }
1496 1512
1497 1513 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1498 1514
1499 1515 if (error) {
1500 1516 *cs->statusp = resp->status = puterrno4(error);
1501 1517 goto out;
1502 1518 }
1503 1519
1504 1520 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1505 1521 *cs->statusp = resp->status = NFS4_OK;
1506 1522 resp->writeverf = nsrv4->write4verf;
1507 1523 out:
1508 1524 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1509 1525 COMMIT4res *, resp);
1510 1526 }
1511 1527
1512 1528 /*
1513 1529 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1514 1530 * was completed. It does the nfsv4 create for special files.
1515 1531 */
1516 1532 /* ARGSUSED */
1517 1533 static vnode_t *
1518 1534 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1519 1535 struct compound_state *cs, vattr_t *vap, char *nm)
1520 1536 {
1521 1537 int error;
1522 1538 cred_t *cr = cs->cr;
1523 1539 vnode_t *dvp = cs->vp;
1524 1540 vnode_t *vp = NULL;
1525 1541 int mode;
1526 1542 enum vcexcl excl;
1527 1543
1528 1544 switch (args->type) {
1529 1545 case NF4CHR:
1530 1546 case NF4BLK:
1531 1547 if (secpolicy_sys_devices(cr) != 0) {
1532 1548 *cs->statusp = resp->status = NFS4ERR_PERM;
1533 1549 return (NULL);
1534 1550 }
1535 1551 if (args->type == NF4CHR)
1536 1552 vap->va_type = VCHR;
1537 1553 else
1538 1554 vap->va_type = VBLK;
1539 1555 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1540 1556 args->ftype4_u.devdata.specdata2);
1541 1557 vap->va_mask |= AT_RDEV;
1542 1558 break;
1543 1559 case NF4SOCK:
1544 1560 vap->va_type = VSOCK;
1545 1561 break;
1546 1562 case NF4FIFO:
1547 1563 vap->va_type = VFIFO;
1548 1564 break;
1549 1565 default:
1550 1566 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1551 1567 return (NULL);
1552 1568 }
1553 1569
1554 1570 /*
1555 1571 * Must specify the mode.
1556 1572 */
1557 1573 if (!(vap->va_mask & AT_MODE)) {
1558 1574 *cs->statusp = resp->status = NFS4ERR_INVAL;
1559 1575 return (NULL);
1560 1576 }
1561 1577
1562 1578 excl = EXCL;
1563 1579
1564 1580 mode = 0;
1565 1581
1566 1582 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1567 1583 if (error) {
1568 1584 *cs->statusp = resp->status = puterrno4(error);
1569 1585 return (NULL);
1570 1586 }
1571 1587 return (vp);
1572 1588 }
1573 1589
1574 1590 /*
1575 1591 * nfsv4 create is used to create non-regular files. For regular files,
1576 1592 * use nfsv4 open.
1577 1593 */
1578 1594 /* ARGSUSED */
1579 1595 static void
1580 1596 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1581 1597 struct compound_state *cs)
1582 1598 {
1583 1599 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1584 1600 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1585 1601 int error;
1586 1602 struct vattr bva, iva, iva2, ava, *vap;
1587 1603 cred_t *cr = cs->cr;
1588 1604 vnode_t *dvp = cs->vp;
1589 1605 vnode_t *vp = NULL;
1590 1606 vnode_t *realvp;
1591 1607 char *nm, *lnm;
1592 1608 uint_t len, llen;
1593 1609 int syncval = 0;
1594 1610 struct nfs4_svgetit_arg sarg;
1595 1611 struct nfs4_ntov_table ntov;
1596 1612 struct statvfs64 sb;
1597 1613 nfsstat4 status;
1598 1614 struct sockaddr *ca;
1599 1615 char *name = NULL;
1600 1616 char *lname = NULL;
1601 1617
1602 1618 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1603 1619 CREATE4args *, args);
1604 1620
1605 1621 resp->attrset = 0;
1606 1622
1607 1623 if (dvp == NULL) {
1608 1624 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1609 1625 goto out;
1610 1626 }
1611 1627
1612 1628 /*
1613 1629 * If there is an unshared filesystem mounted on this vnode,
1614 1630 * do not allow to create an object in this directory.
1615 1631 */
1616 1632 if (vn_ismntpt(dvp)) {
1617 1633 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1618 1634 goto out;
1619 1635 }
1620 1636
1621 1637 /* Verify that type is correct */
1622 1638 switch (args->type) {
1623 1639 case NF4LNK:
1624 1640 case NF4BLK:
1625 1641 case NF4CHR:
1626 1642 case NF4SOCK:
1627 1643 case NF4FIFO:
1628 1644 case NF4DIR:
1629 1645 break;
1630 1646 default:
1631 1647 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1632 1648 goto out;
1633 1649 };
1634 1650
1635 1651 if (cs->access == CS_ACCESS_DENIED) {
1636 1652 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1637 1653 goto out;
1638 1654 }
1639 1655 if (dvp->v_type != VDIR) {
1640 1656 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1641 1657 goto out;
1642 1658 }
1643 1659 status = utf8_dir_verify(&args->objname);
1644 1660 if (status != NFS4_OK) {
1645 1661 *cs->statusp = resp->status = status;
1646 1662 goto out;
1647 1663 }
1648 1664
1649 1665 if (rdonly4(req, cs)) {
1650 1666 *cs->statusp = resp->status = NFS4ERR_ROFS;
1651 1667 goto out;
1652 1668 }
1653 1669
1654 1670 /*
1655 1671 * Name of newly created object
1656 1672 */
1657 1673 nm = utf8_to_fn(&args->objname, &len, NULL);
1658 1674 if (nm == NULL) {
1659 1675 *cs->statusp = resp->status = NFS4ERR_INVAL;
1660 1676 goto out;
1661 1677 }
1662 1678
1663 1679 if (len > MAXNAMELEN) {
1664 1680 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1665 1681 kmem_free(nm, len);
1666 1682 goto out;
1667 1683 }
1668 1684
1669 1685 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1670 1686 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1671 1687 MAXPATHLEN + 1);
1672 1688
1673 1689 if (name == NULL) {
1674 1690 *cs->statusp = resp->status = NFS4ERR_INVAL;
1675 1691 kmem_free(nm, len);
1676 1692 goto out;
1677 1693 }
1678 1694
1679 1695 resp->attrset = 0;
1680 1696
1681 1697 sarg.sbp = &sb;
1682 1698 sarg.is_referral = B_FALSE;
1683 1699 nfs4_ntov_table_init(&ntov);
1684 1700
1685 1701 status = do_rfs4_set_attrs(&resp->attrset,
1686 1702 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1687 1703
1688 1704 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1689 1705 status = NFS4ERR_INVAL;
1690 1706
1691 1707 if (status != NFS4_OK) {
1692 1708 *cs->statusp = resp->status = status;
1693 1709 if (name != nm)
1694 1710 kmem_free(name, MAXPATHLEN + 1);
1695 1711 kmem_free(nm, len);
1696 1712 nfs4_ntov_table_free(&ntov, &sarg);
1697 1713 resp->attrset = 0;
1698 1714 goto out;
1699 1715 }
1700 1716
1701 1717 /* Get "before" change value */
1702 1718 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1703 1719 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1704 1720 if (error) {
1705 1721 *cs->statusp = resp->status = puterrno4(error);
1706 1722 if (name != nm)
1707 1723 kmem_free(name, MAXPATHLEN + 1);
1708 1724 kmem_free(nm, len);
1709 1725 nfs4_ntov_table_free(&ntov, &sarg);
1710 1726 resp->attrset = 0;
1711 1727 goto out;
1712 1728 }
1713 1729 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1714 1730
1715 1731 vap = sarg.vap;
1716 1732
1717 1733 /*
1718 1734 * Set the default initial values for attributes when the parent
1719 1735 * directory does not have the VSUID/VSGID bit set and they have
1720 1736 * not been specified in createattrs.
1721 1737 */
1722 1738 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1723 1739 vap->va_uid = crgetuid(cr);
1724 1740 vap->va_mask |= AT_UID;
1725 1741 }
1726 1742 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1727 1743 vap->va_gid = crgetgid(cr);
1728 1744 vap->va_mask |= AT_GID;
1729 1745 }
1730 1746
1731 1747 vap->va_mask |= AT_TYPE;
1732 1748 switch (args->type) {
1733 1749 case NF4DIR:
1734 1750 vap->va_type = VDIR;
1735 1751 if ((vap->va_mask & AT_MODE) == 0) {
1736 1752 vap->va_mode = 0700; /* default: owner rwx only */
1737 1753 vap->va_mask |= AT_MODE;
1738 1754 }
1739 1755 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1740 1756 if (error)
1741 1757 break;
1742 1758
1743 1759 /*
1744 1760 * Get the initial "after" sequence number, if it fails,
1745 1761 * set to zero
1746 1762 */
1747 1763 iva.va_mask = AT_SEQ;
1748 1764 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1749 1765 iva.va_seq = 0;
1750 1766 break;
1751 1767 case NF4LNK:
1752 1768 vap->va_type = VLNK;
1753 1769 if ((vap->va_mask & AT_MODE) == 0) {
1754 1770 vap->va_mode = 0700; /* default: owner rwx only */
1755 1771 vap->va_mask |= AT_MODE;
1756 1772 }
1757 1773
1758 1774 /*
1759 1775 * symlink names must be treated as data
1760 1776 */
1761 1777 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1762 1778 &llen, NULL);
1763 1779
1764 1780 if (lnm == NULL) {
1765 1781 *cs->statusp = resp->status = NFS4ERR_INVAL;
1766 1782 if (name != nm)
1767 1783 kmem_free(name, MAXPATHLEN + 1);
1768 1784 kmem_free(nm, len);
1769 1785 nfs4_ntov_table_free(&ntov, &sarg);
1770 1786 resp->attrset = 0;
1771 1787 goto out;
1772 1788 }
1773 1789
1774 1790 if (llen > MAXPATHLEN) {
1775 1791 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1776 1792 if (name != nm)
1777 1793 kmem_free(name, MAXPATHLEN + 1);
1778 1794 kmem_free(nm, len);
1779 1795 kmem_free(lnm, llen);
1780 1796 nfs4_ntov_table_free(&ntov, &sarg);
1781 1797 resp->attrset = 0;
1782 1798 goto out;
1783 1799 }
1784 1800
1785 1801 lname = nfscmd_convname(ca, cs->exi, lnm,
1786 1802 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1787 1803
1788 1804 if (lname == NULL) {
1789 1805 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1790 1806 if (name != nm)
1791 1807 kmem_free(name, MAXPATHLEN + 1);
1792 1808 kmem_free(nm, len);
1793 1809 kmem_free(lnm, llen);
1794 1810 nfs4_ntov_table_free(&ntov, &sarg);
1795 1811 resp->attrset = 0;
1796 1812 goto out;
1797 1813 }
1798 1814
1799 1815 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1800 1816 if (lname != lnm)
1801 1817 kmem_free(lname, MAXPATHLEN + 1);
1802 1818 kmem_free(lnm, llen);
1803 1819 if (error)
1804 1820 break;
1805 1821
1806 1822 /*
1807 1823 * Get the initial "after" sequence number, if it fails,
1808 1824 * set to zero
1809 1825 */
1810 1826 iva.va_mask = AT_SEQ;
1811 1827 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1812 1828 iva.va_seq = 0;
1813 1829
1814 1830 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1815 1831 NULL, NULL, NULL);
1816 1832 if (error)
1817 1833 break;
1818 1834
1819 1835 /*
1820 1836 * va_seq is not safe over VOP calls, check it again
1821 1837 * if it has changed zero out iva to force atomic = FALSE.
1822 1838 */
1823 1839 iva2.va_mask = AT_SEQ;
1824 1840 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1825 1841 iva2.va_seq != iva.va_seq)
1826 1842 iva.va_seq = 0;
1827 1843 break;
1828 1844 default:
1829 1845 /*
1830 1846 * probably a special file.
1831 1847 */
1832 1848 if ((vap->va_mask & AT_MODE) == 0) {
1833 1849 vap->va_mode = 0600; /* default: owner rw only */
1834 1850 vap->va_mask |= AT_MODE;
1835 1851 }
1836 1852 syncval = FNODSYNC;
1837 1853 /*
1838 1854 * We know this will only generate one VOP call
1839 1855 */
1840 1856 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1841 1857
1842 1858 if (vp == NULL) {
1843 1859 if (name != nm)
1844 1860 kmem_free(name, MAXPATHLEN + 1);
1845 1861 kmem_free(nm, len);
1846 1862 nfs4_ntov_table_free(&ntov, &sarg);
1847 1863 resp->attrset = 0;
1848 1864 goto out;
1849 1865 }
1850 1866
1851 1867 /*
1852 1868 * Get the initial "after" sequence number, if it fails,
1853 1869 * set to zero
1854 1870 */
1855 1871 iva.va_mask = AT_SEQ;
1856 1872 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1857 1873 iva.va_seq = 0;
1858 1874
1859 1875 break;
1860 1876 }
1861 1877 if (name != nm)
1862 1878 kmem_free(name, MAXPATHLEN + 1);
1863 1879 kmem_free(nm, len);
1864 1880
1865 1881 if (error) {
1866 1882 *cs->statusp = resp->status = puterrno4(error);
1867 1883 }
1868 1884
1869 1885 /*
1870 1886 * Force modified data and metadata out to stable storage.
1871 1887 */
1872 1888 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1873 1889
1874 1890 if (resp->status != NFS4_OK) {
1875 1891 if (vp != NULL)
1876 1892 VN_RELE(vp);
1877 1893 nfs4_ntov_table_free(&ntov, &sarg);
1878 1894 resp->attrset = 0;
1879 1895 goto out;
1880 1896 }
1881 1897
1882 1898 /*
1883 1899 * Finish setup of cinfo response, "before" value already set.
1884 1900 * Get "after" change value, if it fails, simply return the
1885 1901 * before value.
1886 1902 */
1887 1903 ava.va_mask = AT_CTIME|AT_SEQ;
1888 1904 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1889 1905 ava.va_ctime = bva.va_ctime;
1890 1906 ava.va_seq = 0;
1891 1907 }
1892 1908 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1893 1909
1894 1910 /*
1895 1911 * True verification that object was created with correct
1896 1912 * attrs is impossible. The attrs could have been changed
1897 1913 * immediately after object creation. If attributes did
1898 1914 * not verify, the only recourse for the server is to
1899 1915 * destroy the object. Maybe if some attrs (like gid)
1900 1916 * are set incorrectly, the object should be destroyed;
1901 1917 * however, seems bad as a default policy. Do we really
1902 1918 * want to destroy an object over one of the times not
1903 1919 * verifying correctly? For these reasons, the server
1904 1920 * currently sets bits in attrset for createattrs
1905 1921 * that were set; however, no verification is done.
1906 1922 *
1907 1923 * vmask_to_nmask accounts for vattr bits set on create
1908 1924 * [do_rfs4_set_attrs() only sets resp bits for
1909 1925 * non-vattr/vfs bits.]
1910 1926 * Mask off any bits set by default so as not to return
1911 1927 * more attrset bits than were requested in createattrs
1912 1928 */
1913 1929 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1914 1930 resp->attrset &= args->createattrs.attrmask;
1915 1931 nfs4_ntov_table_free(&ntov, &sarg);
1916 1932
1917 1933 error = makefh4(&cs->fh, vp, cs->exi);
1918 1934 if (error) {
1919 1935 *cs->statusp = resp->status = puterrno4(error);
1920 1936 }
1921 1937
1922 1938 /*
1923 1939 * The cinfo.atomic = TRUE only if we got no errors, we have
1924 1940 * non-zero va_seq's, and it has incremented by exactly one
1925 1941 * during the creation and it didn't change during the VOP_LOOKUP
1926 1942 * or VOP_FSYNC.
1927 1943 */
1928 1944 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1929 1945 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1930 1946 resp->cinfo.atomic = TRUE;
1931 1947 else
1932 1948 resp->cinfo.atomic = FALSE;
1933 1949
1934 1950 /*
1935 1951 * Force modified metadata out to stable storage.
1936 1952 *
1937 1953 * if a underlying vp exists, pass it to VOP_FSYNC
1938 1954 */
1939 1955 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1940 1956 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1941 1957 else
1942 1958 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1943 1959
1944 1960 if (resp->status != NFS4_OK) {
1945 1961 VN_RELE(vp);
1946 1962 goto out;
1947 1963 }
1948 1964 if (cs->vp)
1949 1965 VN_RELE(cs->vp);
1950 1966
1951 1967 cs->vp = vp;
1952 1968 *cs->statusp = resp->status = NFS4_OK;
1953 1969 out:
1954 1970 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1955 1971 CREATE4res *, resp);
1956 1972 }
1957 1973
1958 1974 /*ARGSUSED*/
1959 1975 static void
1960 1976 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1961 1977 struct compound_state *cs)
1962 1978 {
1963 1979 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1964 1980 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1965 1981
1966 1982 rfs4_op_inval(argop, resop, req, cs);
1967 1983
1968 1984 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1969 1985 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1970 1986 }
1971 1987
1972 1988 /*ARGSUSED*/
1973 1989 static void
1974 1990 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1975 1991 struct compound_state *cs)
1976 1992 {
1977 1993 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1978 1994 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1979 1995 rfs4_deleg_state_t *dsp;
1980 1996 nfsstat4 status;
1981 1997
1982 1998 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1983 1999 DELEGRETURN4args *, args);
1984 2000
1985 2001 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1986 2002 resp->status = *cs->statusp = status;
1987 2003 if (status != NFS4_OK)
1988 2004 goto out;
1989 2005
1990 2006 /* Ensure specified filehandle matches */
1991 2007 if (cs->vp != dsp->rds_finfo->rf_vp) {
1992 2008 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1993 2009 } else
1994 2010 rfs4_return_deleg(dsp, FALSE);
1995 2011
1996 2012 rfs4_update_lease(dsp->rds_client);
1997 2013
1998 2014 rfs4_deleg_state_rele(dsp);
1999 2015 out:
2000 2016 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2001 2017 DELEGRETURN4res *, resp);
2002 2018 }
2003 2019
2004 2020 /*
2005 2021 * Check to see if a given "flavor" is an explicitly shared flavor.
2006 2022 * The assumption of this routine is the "flavor" is already a valid
2007 2023 * flavor in the secinfo list of "exi".
2008 2024 *
2009 2025 * e.g.
2010 2026 * # share -o sec=flavor1 /export
2011 2027 * # share -o sec=flavor2 /export/home
2012 2028 *
2013 2029 * flavor2 is not an explicitly shared flavor for /export,
2014 2030 * however it is in the secinfo list for /export thru the
2015 2031 * server namespace setup.
2016 2032 */
2017 2033 int
2018 2034 is_exported_sec(int flavor, struct exportinfo *exi)
2019 2035 {
2020 2036 int i;
2021 2037 struct secinfo *sp;
2022 2038
2023 2039 sp = exi->exi_export.ex_secinfo;
2024 2040 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2025 2041 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2026 2042 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2027 2043 return (SEC_REF_EXPORTED(&sp[i]));
2028 2044 }
2029 2045 }
2030 2046
2031 2047 /* Should not reach this point based on the assumption */
2032 2048 return (0);
2033 2049 }
2034 2050
2035 2051 /*
2036 2052 * Check if the security flavor used in the request matches what is
2037 2053 * required at the export point or at the root pseudo node (exi_root).
2038 2054 *
2039 2055 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2040 2056 *
2041 2057 */
2042 2058 static int
2043 2059 secinfo_match_or_authnone(struct compound_state *cs)
2044 2060 {
2045 2061 int i;
2046 2062 struct secinfo *sp;
2047 2063
2048 2064 /*
2049 2065 * Check cs->nfsflavor (from the request) against
2050 2066 * the current export data in cs->exi.
2051 2067 */
2052 2068 sp = cs->exi->exi_export.ex_secinfo;
2053 2069 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2054 2070 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2055 2071 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2056 2072 return (1);
2057 2073 }
2058 2074
2059 2075 return (0);
2060 2076 }
2061 2077
2062 2078 /*
2063 2079 * Check the access authority for the client and return the correct error.
2064 2080 */
2065 2081 nfsstat4
2066 2082 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2067 2083 {
2068 2084 int authres;
2069 2085
2070 2086 /*
2071 2087 * First, check if the security flavor used in the request
2072 2088 * are among the flavors set in the server namespace.
2073 2089 */
2074 2090 if (!secinfo_match_or_authnone(cs)) {
2075 2091 *cs->statusp = NFS4ERR_WRONGSEC;
2076 2092 return (*cs->statusp);
2077 2093 }
2078 2094
2079 2095 authres = checkauth4(cs, req);
2080 2096
2081 2097 if (authres > 0) {
2082 2098 *cs->statusp = NFS4_OK;
2083 2099 if (! (cs->access & CS_ACCESS_LIMITED))
2084 2100 cs->access = CS_ACCESS_OK;
2085 2101 } else if (authres == 0) {
2086 2102 *cs->statusp = NFS4ERR_ACCESS;
2087 2103 } else if (authres == -2) {
2088 2104 *cs->statusp = NFS4ERR_WRONGSEC;
2089 2105 } else {
2090 2106 *cs->statusp = NFS4ERR_DELAY;
2091 2107 }
2092 2108 return (*cs->statusp);
2093 2109 }
2094 2110
2095 2111 /*
2096 2112 * bitmap4_to_attrmask is called by getattr and readdir.
2097 2113 * It sets up the vattr mask and determines whether vfsstat call is needed
2098 2114 * based on the input bitmap.
2099 2115 * Returns nfsv4 status.
2100 2116 */
2101 2117 static nfsstat4
2102 2118 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2103 2119 {
2104 2120 int i;
2105 2121 uint_t va_mask;
2106 2122 struct statvfs64 *sbp = sargp->sbp;
2107 2123
2108 2124 sargp->sbp = NULL;
2109 2125 sargp->flag = 0;
2110 2126 sargp->rdattr_error = NFS4_OK;
2111 2127 sargp->mntdfid_set = FALSE;
2112 2128 if (sargp->cs->vp)
2113 2129 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2114 2130 FH4_ATTRDIR | FH4_NAMEDATTR);
2115 2131 else
2116 2132 sargp->xattr = 0;
2117 2133
2118 2134 /*
2119 2135 * Set rdattr_error_req to true if return error per
2120 2136 * failed entry rather than fail the readdir.
2121 2137 */
2122 2138 if (breq & FATTR4_RDATTR_ERROR_MASK)
2123 2139 sargp->rdattr_error_req = 1;
2124 2140 else
2125 2141 sargp->rdattr_error_req = 0;
2126 2142
2127 2143 /*
2128 2144 * generate the va_mask
2129 2145 * Handle the easy cases first
2130 2146 */
2131 2147 switch (breq) {
2132 2148 case NFS4_NTOV_ATTR_MASK:
2133 2149 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2134 2150 return (NFS4_OK);
2135 2151
2136 2152 case NFS4_FS_ATTR_MASK:
2137 2153 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2138 2154 sargp->sbp = sbp;
2139 2155 return (NFS4_OK);
2140 2156
2141 2157 case NFS4_NTOV_ATTR_CACHE_MASK:
2142 2158 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2143 2159 return (NFS4_OK);
2144 2160
2145 2161 case FATTR4_LEASE_TIME_MASK:
2146 2162 sargp->vap->va_mask = 0;
2147 2163 return (NFS4_OK);
2148 2164
2149 2165 default:
2150 2166 va_mask = 0;
2151 2167 for (i = 0; i < nfs4_ntov_map_size; i++) {
2152 2168 if ((breq & nfs4_ntov_map[i].fbit) &&
2153 2169 nfs4_ntov_map[i].vbit)
2154 2170 va_mask |= nfs4_ntov_map[i].vbit;
2155 2171 }
2156 2172
2157 2173 /*
2158 2174 * Check is vfsstat is needed
2159 2175 */
2160 2176 if (breq & NFS4_FS_ATTR_MASK)
2161 2177 sargp->sbp = sbp;
2162 2178
2163 2179 sargp->vap->va_mask = va_mask;
2164 2180 return (NFS4_OK);
2165 2181 }
2166 2182 /* NOTREACHED */
2167 2183 }
2168 2184
2169 2185 /*
2170 2186 * bitmap4_get_sysattrs is called by getattr and readdir.
2171 2187 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2172 2188 * Returns nfsv4 status.
2173 2189 */
2174 2190 static nfsstat4
2175 2191 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2176 2192 {
2177 2193 int error;
2178 2194 struct compound_state *cs = sargp->cs;
2179 2195 vnode_t *vp = cs->vp;
2180 2196
2181 2197 if (sargp->sbp != NULL) {
2182 2198 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2183 2199 sargp->sbp = NULL; /* to identify error */
2184 2200 return (puterrno4(error));
2185 2201 }
2186 2202 }
2187 2203
2188 2204 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2189 2205 }
2190 2206
2191 2207 static void
2192 2208 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2193 2209 {
2194 2210 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2195 2211 KM_SLEEP);
2196 2212 ntovp->attrcnt = 0;
2197 2213 ntovp->vfsstat = FALSE;
2198 2214 }
2199 2215
2200 2216 static void
2201 2217 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2202 2218 struct nfs4_svgetit_arg *sargp)
2203 2219 {
2204 2220 int i;
2205 2221 union nfs4_attr_u *na;
2206 2222 uint8_t *amap;
2207 2223
2208 2224 /*
2209 2225 * XXX Should do the same checks for whether the bit is set
2210 2226 */
2211 2227 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2212 2228 i < ntovp->attrcnt; i++, na++, amap++) {
2213 2229 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2214 2230 NFS4ATTR_FREEIT, sargp, na);
2215 2231 }
2216 2232 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2217 2233 /*
2218 2234 * xdr_free for getattr will be done later
2219 2235 */
2220 2236 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2221 2237 i < ntovp->attrcnt; i++, na++, amap++) {
2222 2238 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2223 2239 }
2224 2240 }
2225 2241 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2226 2242 }
2227 2243
2228 2244 /*
2229 2245 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2230 2246 */
2231 2247 static nfsstat4
2232 2248 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2233 2249 struct nfs4_svgetit_arg *sargp)
2234 2250 {
2235 2251 int error = 0;
2236 2252 int i, k;
2237 2253 struct nfs4_ntov_table ntov;
2238 2254 XDR xdr;
2239 2255 ulong_t xdr_size;
2240 2256 char *xdr_attrs;
2241 2257 nfsstat4 status = NFS4_OK;
2242 2258 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2243 2259 union nfs4_attr_u *na;
2244 2260 uint8_t *amap;
2245 2261
2246 2262 sargp->op = NFS4ATTR_GETIT;
2247 2263 sargp->flag = 0;
2248 2264
2249 2265 fattrp->attrmask = 0;
2250 2266 /* if no bits requested, then return empty fattr4 */
2251 2267 if (breq == 0) {
2252 2268 fattrp->attrlist4_len = 0;
2253 2269 fattrp->attrlist4 = NULL;
2254 2270 return (NFS4_OK);
2255 2271 }
2256 2272
2257 2273 /*
2258 2274 * return NFS4ERR_INVAL when client requests write-only attrs
2259 2275 */
2260 2276 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2261 2277 return (NFS4ERR_INVAL);
2262 2278
2263 2279 nfs4_ntov_table_init(&ntov);
2264 2280 na = ntov.na;
2265 2281 amap = ntov.amap;
2266 2282
2267 2283 /*
2268 2284 * Now loop to get or verify the attrs
2269 2285 */
2270 2286 for (i = 0; i < nfs4_ntov_map_size; i++) {
2271 2287 if (breq & nfs4_ntov_map[i].fbit) {
2272 2288 if ((*nfs4_ntov_map[i].sv_getit)(
2273 2289 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2274 2290
2275 2291 error = (*nfs4_ntov_map[i].sv_getit)(
2276 2292 NFS4ATTR_GETIT, sargp, na);
2277 2293
2278 2294 /*
2279 2295 * Possible error values:
2280 2296 * >0 if sv_getit failed to
2281 2297 * get the attr; 0 if succeeded;
2282 2298 * <0 if rdattr_error and the
2283 2299 * attribute cannot be returned.
2284 2300 */
2285 2301 if (error && !(sargp->rdattr_error_req))
2286 2302 goto done;
2287 2303 /*
2288 2304 * If error then just for entry
2289 2305 */
2290 2306 if (error == 0) {
2291 2307 fattrp->attrmask |=
2292 2308 nfs4_ntov_map[i].fbit;
2293 2309 *amap++ =
2294 2310 (uint8_t)nfs4_ntov_map[i].nval;
2295 2311 na++;
2296 2312 (ntov.attrcnt)++;
2297 2313 } else if ((error > 0) &&
2298 2314 (sargp->rdattr_error == NFS4_OK)) {
2299 2315 sargp->rdattr_error = puterrno4(error);
2300 2316 }
2301 2317 error = 0;
2302 2318 }
2303 2319 }
2304 2320 }
2305 2321
2306 2322 /*
2307 2323 * If rdattr_error was set after the return value for it was assigned,
2308 2324 * update it.
2309 2325 */
2310 2326 if (prev_rdattr_error != sargp->rdattr_error) {
2311 2327 na = ntov.na;
2312 2328 amap = ntov.amap;
2313 2329 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2314 2330 k = *amap;
2315 2331 if (k < FATTR4_RDATTR_ERROR) {
2316 2332 continue;
2317 2333 }
2318 2334 if ((k == FATTR4_RDATTR_ERROR) &&
2319 2335 ((*nfs4_ntov_map[k].sv_getit)(
2320 2336 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2321 2337
2322 2338 (void) (*nfs4_ntov_map[k].sv_getit)(
2323 2339 NFS4ATTR_GETIT, sargp, na);
2324 2340 }
2325 2341 break;
2326 2342 }
2327 2343 }
2328 2344
2329 2345 xdr_size = 0;
2330 2346 na = ntov.na;
2331 2347 amap = ntov.amap;
2332 2348 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2333 2349 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2334 2350 }
2335 2351
2336 2352 fattrp->attrlist4_len = xdr_size;
2337 2353 if (xdr_size) {
2338 2354 /* freed by rfs4_op_getattr_free() */
2339 2355 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2340 2356
2341 2357 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2342 2358
2343 2359 na = ntov.na;
2344 2360 amap = ntov.amap;
2345 2361 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2346 2362 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2347 2363 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2348 2364 int, *amap);
2349 2365 status = NFS4ERR_SERVERFAULT;
2350 2366 break;
2351 2367 }
2352 2368 }
2353 2369 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2354 2370 } else {
2355 2371 fattrp->attrlist4 = NULL;
2356 2372 }
2357 2373 done:
2358 2374
2359 2375 nfs4_ntov_table_free(&ntov, sargp);
2360 2376
2361 2377 if (error != 0)
2362 2378 status = puterrno4(error);
2363 2379
2364 2380 return (status);
2365 2381 }
2366 2382
2367 2383 /* ARGSUSED */
2368 2384 static void
2369 2385 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2370 2386 struct compound_state *cs)
2371 2387 {
2372 2388 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2373 2389 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2374 2390 struct nfs4_svgetit_arg sarg;
2375 2391 struct statvfs64 sb;
2376 2392 nfsstat4 status;
2377 2393
2378 2394 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2379 2395 GETATTR4args *, args);
2380 2396
2381 2397 if (cs->vp == NULL) {
2382 2398 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2383 2399 goto out;
2384 2400 }
2385 2401
2386 2402 if (cs->access == CS_ACCESS_DENIED) {
2387 2403 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2388 2404 goto out;
2389 2405 }
2390 2406
2391 2407 sarg.sbp = &sb;
2392 2408 sarg.cs = cs;
2393 2409 sarg.is_referral = B_FALSE;
2394 2410
2395 2411 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2396 2412 if (status == NFS4_OK) {
2397 2413
2398 2414 status = bitmap4_get_sysattrs(&sarg);
2399 2415 if (status == NFS4_OK) {
2400 2416
2401 2417 /* Is this a referral? */
2402 2418 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2403 2419 /* Older V4 Solaris client sees a link */
2404 2420 if (client_is_downrev(req))
2405 2421 sarg.vap->va_type = VLNK;
2406 2422 else
2407 2423 sarg.is_referral = B_TRUE;
2408 2424 }
2409 2425
2410 2426 status = do_rfs4_op_getattr(args->attr_request,
2411 2427 &resp->obj_attributes, &sarg);
2412 2428 }
2413 2429 }
2414 2430 *cs->statusp = resp->status = status;
2415 2431 out:
2416 2432 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2417 2433 GETATTR4res *, resp);
2418 2434 }
2419 2435
2420 2436 static void
2421 2437 rfs4_op_getattr_free(nfs_resop4 *resop)
2422 2438 {
2423 2439 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2424 2440
2425 2441 nfs4_fattr4_free(&resp->obj_attributes);
2426 2442 }
2427 2443
2428 2444 /* ARGSUSED */
2429 2445 static void
2430 2446 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2431 2447 struct compound_state *cs)
2432 2448 {
2433 2449 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2434 2450
2435 2451 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2436 2452
2437 2453 if (cs->vp == NULL) {
2438 2454 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2439 2455 goto out;
2440 2456 }
2441 2457 if (cs->access == CS_ACCESS_DENIED) {
2442 2458 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2443 2459 goto out;
2444 2460 }
2445 2461
2446 2462 /* check for reparse point at the share point */
2447 2463 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2448 2464 /* it's all bad */
2449 2465 cs->exi->exi_moved = 1;
2450 2466 *cs->statusp = resp->status = NFS4ERR_MOVED;
2451 2467 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2452 2468 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2453 2469 return;
2454 2470 }
2455 2471
2456 2472 /* check for reparse point at vp */
2457 2473 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2458 2474 /* it's not all bad */
2459 2475 *cs->statusp = resp->status = NFS4ERR_MOVED;
2460 2476 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2461 2477 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2462 2478 return;
2463 2479 }
2464 2480
2465 2481 resp->object.nfs_fh4_val =
2466 2482 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2467 2483 nfs_fh4_copy(&cs->fh, &resp->object);
2468 2484 *cs->statusp = resp->status = NFS4_OK;
2469 2485 out:
2470 2486 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2471 2487 GETFH4res *, resp);
2472 2488 }
2473 2489
2474 2490 static void
2475 2491 rfs4_op_getfh_free(nfs_resop4 *resop)
2476 2492 {
2477 2493 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2478 2494
2479 2495 if (resp->status == NFS4_OK &&
2480 2496 resp->object.nfs_fh4_val != NULL) {
2481 2497 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2482 2498 resp->object.nfs_fh4_val = NULL;
2483 2499 resp->object.nfs_fh4_len = 0;
2484 2500 }
2485 2501 }
2486 2502
2487 2503 /*
2488 2504 * illegal: args: void
2489 2505 * res : status (NFS4ERR_OP_ILLEGAL)
2490 2506 */
2491 2507 /* ARGSUSED */
2492 2508 static void
2493 2509 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2494 2510 struct svc_req *req, struct compound_state *cs)
2495 2511 {
2496 2512 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2497 2513
2498 2514 resop->resop = OP_ILLEGAL;
2499 2515 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2500 2516 }
2501 2517
2502 2518 /*
2503 2519 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2504 2520 * res: status. If success - CURRENT_FH unchanged, return change_info
2505 2521 */
2506 2522 /* ARGSUSED */
2507 2523 static void
2508 2524 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2509 2525 struct compound_state *cs)
2510 2526 {
2511 2527 LINK4args *args = &argop->nfs_argop4_u.oplink;
2512 2528 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2513 2529 int error;
2514 2530 vnode_t *vp;
2515 2531 vnode_t *dvp;
2516 2532 struct vattr bdva, idva, adva;
2517 2533 char *nm;
2518 2534 uint_t len;
2519 2535 struct sockaddr *ca;
2520 2536 char *name = NULL;
2521 2537 nfsstat4 status;
2522 2538
2523 2539 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2524 2540 LINK4args *, args);
2525 2541
2526 2542 /* SAVED_FH: source object */
2527 2543 vp = cs->saved_vp;
2528 2544 if (vp == NULL) {
2529 2545 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2530 2546 goto out;
2531 2547 }
2532 2548
2533 2549 /* CURRENT_FH: target directory */
2534 2550 dvp = cs->vp;
2535 2551 if (dvp == NULL) {
2536 2552 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2537 2553 goto out;
2538 2554 }
2539 2555
2540 2556 /*
2541 2557 * If there is a non-shared filesystem mounted on this vnode,
2542 2558 * do not allow to link any file in this directory.
2543 2559 */
2544 2560 if (vn_ismntpt(dvp)) {
2545 2561 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2546 2562 goto out;
2547 2563 }
2548 2564
2549 2565 if (cs->access == CS_ACCESS_DENIED) {
2550 2566 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2551 2567 goto out;
2552 2568 }
2553 2569
2554 2570 /* Check source object's type validity */
2555 2571 if (vp->v_type == VDIR) {
2556 2572 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2557 2573 goto out;
2558 2574 }
2559 2575
2560 2576 /* Check target directory's type */
2561 2577 if (dvp->v_type != VDIR) {
2562 2578 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2563 2579 goto out;
2564 2580 }
2565 2581
2566 2582 if (cs->saved_exi != cs->exi) {
2567 2583 *cs->statusp = resp->status = NFS4ERR_XDEV;
2568 2584 goto out;
2569 2585 }
2570 2586
2571 2587 status = utf8_dir_verify(&args->newname);
2572 2588 if (status != NFS4_OK) {
2573 2589 *cs->statusp = resp->status = status;
2574 2590 goto out;
2575 2591 }
2576 2592
2577 2593 nm = utf8_to_fn(&args->newname, &len, NULL);
2578 2594 if (nm == NULL) {
2579 2595 *cs->statusp = resp->status = NFS4ERR_INVAL;
2580 2596 goto out;
2581 2597 }
2582 2598
2583 2599 if (len > MAXNAMELEN) {
2584 2600 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2585 2601 kmem_free(nm, len);
2586 2602 goto out;
2587 2603 }
2588 2604
2589 2605 if (rdonly4(req, cs)) {
2590 2606 *cs->statusp = resp->status = NFS4ERR_ROFS;
2591 2607 kmem_free(nm, len);
2592 2608 goto out;
2593 2609 }
2594 2610
2595 2611 /* Get "before" change value */
2596 2612 bdva.va_mask = AT_CTIME|AT_SEQ;
2597 2613 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2598 2614 if (error) {
2599 2615 *cs->statusp = resp->status = puterrno4(error);
2600 2616 kmem_free(nm, len);
2601 2617 goto out;
2602 2618 }
2603 2619
2604 2620 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2605 2621 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2606 2622 MAXPATHLEN + 1);
2607 2623
2608 2624 if (name == NULL) {
2609 2625 *cs->statusp = resp->status = NFS4ERR_INVAL;
2610 2626 kmem_free(nm, len);
2611 2627 goto out;
2612 2628 }
2613 2629
2614 2630 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2615 2631
2616 2632 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2617 2633
2618 2634 if (nm != name)
2619 2635 kmem_free(name, MAXPATHLEN + 1);
2620 2636 kmem_free(nm, len);
2621 2637
2622 2638 /*
2623 2639 * Get the initial "after" sequence number, if it fails, set to zero
2624 2640 */
2625 2641 idva.va_mask = AT_SEQ;
2626 2642 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2627 2643 idva.va_seq = 0;
2628 2644
2629 2645 /*
2630 2646 * Force modified data and metadata out to stable storage.
2631 2647 */
2632 2648 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2633 2649 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2634 2650
2635 2651 if (error) {
2636 2652 *cs->statusp = resp->status = puterrno4(error);
2637 2653 goto out;
2638 2654 }
2639 2655
2640 2656 /*
2641 2657 * Get "after" change value, if it fails, simply return the
2642 2658 * before value.
2643 2659 */
2644 2660 adva.va_mask = AT_CTIME|AT_SEQ;
2645 2661 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2646 2662 adva.va_ctime = bdva.va_ctime;
2647 2663 adva.va_seq = 0;
2648 2664 }
2649 2665
2650 2666 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2651 2667
2652 2668 /*
2653 2669 * The cinfo.atomic = TRUE only if we have
2654 2670 * non-zero va_seq's, and it has incremented by exactly one
2655 2671 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2656 2672 */
2657 2673 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2658 2674 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2659 2675 resp->cinfo.atomic = TRUE;
2660 2676 else
2661 2677 resp->cinfo.atomic = FALSE;
2662 2678
2663 2679 *cs->statusp = resp->status = NFS4_OK;
2664 2680 out:
2665 2681 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2666 2682 LINK4res *, resp);
2667 2683 }
2668 2684
2669 2685 /*
2670 2686 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2671 2687 */
2672 2688
2673 2689 /* ARGSUSED */
2674 2690 static nfsstat4
2675 2691 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2676 2692 {
2677 2693 int error;
2678 2694 int different_export = 0;
2679 2695 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2680 2696 struct exportinfo *exi = NULL, *pre_exi = NULL;
2681 2697 nfsstat4 stat;
2682 2698 fid_t fid;
2683 2699 int attrdir, dotdot, walk;
2684 2700 bool_t is_newvp = FALSE;
2685 2701
2686 2702 if (cs->vp->v_flag & V_XATTRDIR) {
2687 2703 attrdir = 1;
2688 2704 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2689 2705 } else {
2690 2706 attrdir = 0;
2691 2707 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2692 2708 }
2693 2709
2694 2710 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2695 2711
2696 2712 /*
2697 2713 * If dotdotting, then need to check whether it's
2698 2714 * above the root of a filesystem, or above an
2699 2715 * export point.
2700 2716 */
2701 2717 if (dotdot) {
2702 2718
2703 2719 /*
2704 2720 * If dotdotting at the root of a filesystem, then
2705 2721 * need to traverse back to the mounted-on filesystem
2706 2722 * and do the dotdot lookup there.
2707 2723 */
2708 2724 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2709 2725
2710 2726 /*
2711 2727 * If at the system root, then can
2712 2728 * go up no further.
2713 2729 */
2714 2730 if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2715 2731 return (puterrno4(ENOENT));
2716 2732
2717 2733 /*
2718 2734 * Traverse back to the mounted-on filesystem
2719 2735 */
2720 2736 cs->vp = untraverse(cs->vp);
2721 2737
2722 2738 /*
2723 2739 * Set the different_export flag so we remember
2724 2740 * to pick up a new exportinfo entry for
2725 2741 * this new filesystem.
2726 2742 */
2727 2743 different_export = 1;
2728 2744 } else {
2729 2745
2730 2746 /*
2731 2747 * If dotdotting above an export point then set
2732 2748 * the different_export to get new export info.
2733 2749 */
2734 2750 different_export = nfs_exported(cs->exi, cs->vp);
2735 2751 }
2736 2752 }
2737 2753
2738 2754 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2739 2755 NULL, NULL, NULL);
2740 2756 if (error)
2741 2757 return (puterrno4(error));
2742 2758
2743 2759 /*
2744 2760 * If the vnode is in a pseudo filesystem, check whether it is visible.
2745 2761 *
2746 2762 * XXX if the vnode is a symlink and it is not visible in
2747 2763 * a pseudo filesystem, return ENOENT (not following symlink).
2748 2764 * V4 client can not mount such symlink. This is a regression
2749 2765 * from V2/V3.
2750 2766 *
2751 2767 * In the same exported filesystem, if the security flavor used
2752 2768 * is not an explicitly shared flavor, limit the view to the visible
2753 2769 * list entries only. This is not a WRONGSEC case because it's already
2754 2770 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2755 2771 */
2756 2772 if (!different_export &&
2757 2773 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2758 2774 cs->access & CS_ACCESS_LIMITED)) {
2759 2775 if (! nfs_visible(cs->exi, vp, &different_export)) {
2760 2776 VN_RELE(vp);
2761 2777 return (puterrno4(ENOENT));
2762 2778 }
2763 2779 }
2764 2780
2765 2781 /*
2766 2782 * If it's a mountpoint, then traverse it.
2767 2783 */
2768 2784 if (vn_ismntpt(vp)) {
2769 2785 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2770 2786 pre_tvp = vp; /* save pre-traversed vnode */
2771 2787
2772 2788 /*
2773 2789 * hold pre_tvp to counteract rele by traverse. We will
2774 2790 * need pre_tvp below if checkexport4 fails
2775 2791 */
2776 2792 VN_HOLD(pre_tvp);
2777 2793 if ((error = traverse(&vp)) != 0) {
2778 2794 VN_RELE(vp);
2779 2795 VN_RELE(pre_tvp);
2780 2796 return (puterrno4(error));
2781 2797 }
2782 2798 different_export = 1;
2783 2799 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2784 2800 /*
2785 2801 * The vfsp comparison is to handle the case where
2786 2802 * a LOFS mount is shared. lo_lookup traverses mount points,
2787 2803 * and NFS is unaware of local fs transistions because
2788 2804 * v_vfsmountedhere isn't set. For this special LOFS case,
2789 2805 * the dir and the obj returned by lookup will have different
2790 2806 * vfs ptrs.
2791 2807 */
2792 2808 different_export = 1;
2793 2809 }
2794 2810
2795 2811 if (different_export) {
2796 2812
2797 2813 bzero(&fid, sizeof (fid));
2798 2814 fid.fid_len = MAXFIDSZ;
2799 2815 error = vop_fid_pseudo(vp, &fid);
2800 2816 if (error) {
2801 2817 VN_RELE(vp);
2802 2818 if (pre_tvp)
2803 2819 VN_RELE(pre_tvp);
2804 2820 return (puterrno4(error));
2805 2821 }
2806 2822
2807 2823 if (dotdot)
2808 2824 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2809 2825 else
2810 2826 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2811 2827
2812 2828 if (exi == NULL) {
2813 2829 if (pre_tvp) {
2814 2830 /*
2815 2831 * If this vnode is a mounted-on vnode,
2816 2832 * but the mounted-on file system is not
2817 2833 * exported, send back the filehandle for
2818 2834 * the mounted-on vnode, not the root of
2819 2835 * the mounted-on file system.
2820 2836 */
2821 2837 VN_RELE(vp);
2822 2838 vp = pre_tvp;
2823 2839 exi = pre_exi;
2824 2840 } else {
2825 2841 VN_RELE(vp);
2826 2842 return (puterrno4(EACCES));
2827 2843 }
2828 2844 } else if (pre_tvp) {
2829 2845 /* we're done with pre_tvp now. release extra hold */
2830 2846 VN_RELE(pre_tvp);
2831 2847 }
2832 2848
2833 2849 cs->exi = exi;
2834 2850
2835 2851 /*
2836 2852 * Now we do a checkauth4. The reason is that
2837 2853 * this client/user may not have access to the new
2838 2854 * exported file system, and if they do,
2839 2855 * the client/user may be mapped to a different uid.
2840 2856 *
2841 2857 * We start with a new cr, because the checkauth4 done
2842 2858 * in the PUT*FH operation over wrote the cred's uid,
2843 2859 * gid, etc, and we want the real thing before calling
2844 2860 * checkauth4()
2845 2861 */
2846 2862 crfree(cs->cr);
2847 2863 cs->cr = crdup(cs->basecr);
2848 2864
2849 2865 oldvp = cs->vp;
2850 2866 cs->vp = vp;
2851 2867 is_newvp = TRUE;
2852 2868
2853 2869 stat = call_checkauth4(cs, req);
2854 2870 if (stat != NFS4_OK) {
2855 2871 VN_RELE(cs->vp);
2856 2872 cs->vp = oldvp;
2857 2873 return (stat);
2858 2874 }
2859 2875 }
2860 2876
2861 2877 /*
2862 2878 * After various NFS checks, do a label check on the path
2863 2879 * component. The label on this path should either be the
2864 2880 * global zone's label or a zone's label. We are only
2865 2881 * interested in the zone's label because exported files
2866 2882 * in global zone is accessible (though read-only) to
2867 2883 * clients. The exportability/visibility check is already
2868 2884 * done before reaching this code.
2869 2885 */
2870 2886 if (is_system_labeled()) {
2871 2887 bslabel_t *clabel;
2872 2888
2873 2889 ASSERT(req->rq_label != NULL);
2874 2890 clabel = req->rq_label;
2875 2891 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2876 2892 "got client label from request(1)", struct svc_req *, req);
2877 2893
2878 2894 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2879 2895 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2880 2896 cs->exi)) {
2881 2897 error = EACCES;
2882 2898 goto err_out;
2883 2899 }
2884 2900 } else {
2885 2901 /*
2886 2902 * We grant access to admin_low label clients
2887 2903 * only if the client is trusted, i.e. also
2888 2904 * running Solaris Trusted Extension.
2889 2905 */
2890 2906 struct sockaddr *ca;
2891 2907 int addr_type;
2892 2908 void *ipaddr;
2893 2909 tsol_tpc_t *tp;
2894 2910
2895 2911 ca = (struct sockaddr *)svc_getrpccaller(
2896 2912 req->rq_xprt)->buf;
2897 2913 if (ca->sa_family == AF_INET) {
2898 2914 addr_type = IPV4_VERSION;
2899 2915 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2900 2916 } else if (ca->sa_family == AF_INET6) {
2901 2917 addr_type = IPV6_VERSION;
2902 2918 ipaddr = &((struct sockaddr_in6 *)
2903 2919 ca)->sin6_addr;
2904 2920 }
2905 2921 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2906 2922 if (tp == NULL || tp->tpc_tp.tp_doi !=
2907 2923 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2908 2924 SUN_CIPSO) {
2909 2925 if (tp != NULL)
2910 2926 TPC_RELE(tp);
2911 2927 error = EACCES;
2912 2928 goto err_out;
2913 2929 }
2914 2930 TPC_RELE(tp);
2915 2931 }
2916 2932 }
2917 2933
2918 2934 error = makefh4(&cs->fh, vp, cs->exi);
2919 2935
2920 2936 err_out:
2921 2937 if (error) {
2922 2938 if (is_newvp) {
2923 2939 VN_RELE(cs->vp);
2924 2940 cs->vp = oldvp;
2925 2941 } else
2926 2942 VN_RELE(vp);
2927 2943 return (puterrno4(error));
2928 2944 }
2929 2945
2930 2946 if (!is_newvp) {
2931 2947 if (cs->vp)
2932 2948 VN_RELE(cs->vp);
2933 2949 cs->vp = vp;
2934 2950 } else if (oldvp)
2935 2951 VN_RELE(oldvp);
2936 2952
2937 2953 /*
2938 2954 * if did lookup on attrdir and didn't lookup .., set named
2939 2955 * attr fh flag
2940 2956 */
2941 2957 if (attrdir && ! dotdot)
2942 2958 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2943 2959
2944 2960 /* Assume false for now, open proc will set this */
2945 2961 cs->mandlock = FALSE;
2946 2962
2947 2963 return (NFS4_OK);
2948 2964 }
2949 2965
2950 2966 /* ARGSUSED */
2951 2967 static void
2952 2968 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2953 2969 struct compound_state *cs)
2954 2970 {
2955 2971 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2956 2972 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2957 2973 char *nm;
2958 2974 uint_t len;
2959 2975 struct sockaddr *ca;
2960 2976 char *name = NULL;
2961 2977 nfsstat4 status;
2962 2978
2963 2979 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2964 2980 LOOKUP4args *, args);
2965 2981
2966 2982 if (cs->vp == NULL) {
2967 2983 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2968 2984 goto out;
2969 2985 }
2970 2986
2971 2987 if (cs->vp->v_type == VLNK) {
2972 2988 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2973 2989 goto out;
2974 2990 }
2975 2991
2976 2992 if (cs->vp->v_type != VDIR) {
2977 2993 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2978 2994 goto out;
2979 2995 }
2980 2996
2981 2997 status = utf8_dir_verify(&args->objname);
2982 2998 if (status != NFS4_OK) {
2983 2999 *cs->statusp = resp->status = status;
2984 3000 goto out;
2985 3001 }
2986 3002
2987 3003 nm = utf8_to_str(&args->objname, &len, NULL);
2988 3004 if (nm == NULL) {
2989 3005 *cs->statusp = resp->status = NFS4ERR_INVAL;
2990 3006 goto out;
2991 3007 }
2992 3008
2993 3009 if (len > MAXNAMELEN) {
2994 3010 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2995 3011 kmem_free(nm, len);
2996 3012 goto out;
2997 3013 }
2998 3014
2999 3015 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3000 3016 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3001 3017 MAXPATHLEN + 1);
3002 3018
3003 3019 if (name == NULL) {
3004 3020 *cs->statusp = resp->status = NFS4ERR_INVAL;
3005 3021 kmem_free(nm, len);
3006 3022 goto out;
3007 3023 }
3008 3024
3009 3025 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3010 3026
3011 3027 if (name != nm)
3012 3028 kmem_free(name, MAXPATHLEN + 1);
3013 3029 kmem_free(nm, len);
3014 3030
3015 3031 out:
3016 3032 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3017 3033 LOOKUP4res *, resp);
3018 3034 }
3019 3035
3020 3036 /* ARGSUSED */
3021 3037 static void
3022 3038 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3023 3039 struct compound_state *cs)
3024 3040 {
3025 3041 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3026 3042
3027 3043 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3028 3044
3029 3045 if (cs->vp == NULL) {
3030 3046 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3031 3047 goto out;
3032 3048 }
3033 3049
3034 3050 if (cs->vp->v_type != VDIR) {
3035 3051 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3036 3052 goto out;
3037 3053 }
3038 3054
3039 3055 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3040 3056
3041 3057 /*
3042 3058 * From NFSV4 Specification, LOOKUPP should not check for
3043 3059 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3044 3060 */
3045 3061 if (resp->status == NFS4ERR_WRONGSEC) {
3046 3062 *cs->statusp = resp->status = NFS4_OK;
3047 3063 }
3048 3064
3049 3065 out:
3050 3066 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3051 3067 LOOKUPP4res *, resp);
3052 3068 }
3053 3069
3054 3070
3055 3071 /*ARGSUSED2*/
3056 3072 static void
3057 3073 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3058 3074 struct compound_state *cs)
3059 3075 {
3060 3076 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
3061 3077 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
3062 3078 vnode_t *avp = NULL;
3063 3079 int lookup_flags = LOOKUP_XATTR, error;
3064 3080 int exp_ro = 0;
3065 3081
3066 3082 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3067 3083 OPENATTR4args *, args);
3068 3084
3069 3085 if (cs->vp == NULL) {
3070 3086 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3071 3087 goto out;
3072 3088 }
3073 3089
3074 3090 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3075 3091 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3076 3092 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3077 3093 goto out;
3078 3094 }
3079 3095
3080 3096 /*
3081 3097 * If file system supports passing ACE mask to VOP_ACCESS then
3082 3098 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3083 3099 */
3084 3100
3085 3101 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3086 3102 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3087 3103 V_ACE_MASK, cs->cr, NULL);
3088 3104 else
3089 3105 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3090 3106 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3091 3107 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3092 3108
3093 3109 if (error) {
3094 3110 *cs->statusp = resp->status = puterrno4(EACCES);
3095 3111 goto out;
3096 3112 }
3097 3113
3098 3114 /*
3099 3115 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3100 3116 * the file system is exported read-only -- regardless of
3101 3117 * createdir flag. Otherwise the attrdir would be created
3102 3118 * (assuming server fs isn't mounted readonly locally). If
3103 3119 * VOP_LOOKUP returns ENOENT in this case, the error will
3104 3120 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3105 3121 * because specfs has no VOP_LOOKUP op, so the macro would
3106 3122 * return ENOSYS. EINVAL is returned by all (current)
3107 3123 * Solaris file system implementations when any of their
3108 3124 * restrictions are violated (xattr(dir) can't have xattrdir).
3109 3125 * Returning NOTSUPP is more appropriate in this case
3110 3126 * because the object will never be able to have an attrdir.
3111 3127 */
3112 3128 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3113 3129 lookup_flags |= CREATE_XATTR_DIR;
3114 3130
3115 3131 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3116 3132 NULL, NULL, NULL);
3117 3133
3118 3134 if (error) {
3119 3135 if (error == ENOENT && args->createdir && exp_ro)
3120 3136 *cs->statusp = resp->status = puterrno4(EROFS);
3121 3137 else if (error == EINVAL || error == ENOSYS)
3122 3138 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3123 3139 else
3124 3140 *cs->statusp = resp->status = puterrno4(error);
3125 3141 goto out;
3126 3142 }
3127 3143
3128 3144 ASSERT(avp->v_flag & V_XATTRDIR);
3129 3145
3130 3146 error = makefh4(&cs->fh, avp, cs->exi);
3131 3147
3132 3148 if (error) {
3133 3149 VN_RELE(avp);
3134 3150 *cs->statusp = resp->status = puterrno4(error);
3135 3151 goto out;
3136 3152 }
3137 3153
3138 3154 VN_RELE(cs->vp);
3139 3155 cs->vp = avp;
3140 3156
3141 3157 /*
3142 3158 * There is no requirement for an attrdir fh flag
3143 3159 * because the attrdir has a vnode flag to distinguish
3144 3160 * it from regular (non-xattr) directories. The
3145 3161 * FH4_ATTRDIR flag is set for future sanity checks.
3146 3162 */
3147 3163 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3148 3164 *cs->statusp = resp->status = NFS4_OK;
3149 3165
3150 3166 out:
3151 3167 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3152 3168 OPENATTR4res *, resp);
3153 3169 }
3154 3170
3155 3171 static int
3156 3172 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3157 3173 caller_context_t *ct)
3158 3174 {
3159 3175 int error;
3160 3176 int i;
3161 3177 clock_t delaytime;
3162 3178
3163 3179 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3164 3180
3165 3181 /*
3166 3182 * Don't block on mandatory locks. If this routine returns
3167 3183 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3168 3184 */
3169 3185 uio->uio_fmode = FNONBLOCK;
3170 3186
3171 3187 for (i = 0; i < rfs4_maxlock_tries; i++) {
3172 3188
3173 3189
3174 3190 if (direction == FREAD) {
3175 3191 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3176 3192 error = VOP_READ(vp, uio, ioflag, cred, ct);
3177 3193 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3178 3194 } else {
3179 3195 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3180 3196 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3181 3197 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3182 3198 }
3183 3199
3184 3200 if (error != EAGAIN)
3185 3201 break;
3186 3202
3187 3203 if (i < rfs4_maxlock_tries - 1) {
3188 3204 delay(delaytime);
3189 3205 delaytime *= 2;
3190 3206 }
3191 3207 }
3192 3208
3193 3209 return (error);
3194 3210 }
3195 3211
3196 3212 /* ARGSUSED */
3197 3213 static void
3198 3214 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3199 3215 struct compound_state *cs)
3200 3216 {
3201 3217 READ4args *args = &argop->nfs_argop4_u.opread;
3202 3218 READ4res *resp = &resop->nfs_resop4_u.opread;
3203 3219 int error;
3204 3220 int verror;
3205 3221 vnode_t *vp;
3206 3222 struct vattr va;
3207 3223 struct iovec iov, *iovp = NULL;
3208 3224 int iovcnt;
3209 3225 struct uio uio;
3210 3226 u_offset_t offset;
3211 3227 bool_t *deleg = &cs->deleg;
3212 3228 nfsstat4 stat;
3213 3229 int in_crit = 0;
3214 3230 mblk_t *mp = NULL;
3215 3231 int alloc_err = 0;
3216 3232 int rdma_used = 0;
3217 3233 int loaned_buffers;
3218 3234 caller_context_t ct;
3219 3235 struct uio *uiop;
3220 3236
3221 3237 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3222 3238 READ4args, args);
3223 3239
3224 3240 vp = cs->vp;
3225 3241 if (vp == NULL) {
3226 3242 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3227 3243 goto out;
3228 3244 }
3229 3245 if (cs->access == CS_ACCESS_DENIED) {
3230 3246 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3231 3247 goto out;
3232 3248 }
3233 3249
3234 3250 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3235 3251 deleg, TRUE, &ct)) != NFS4_OK) {
3236 3252 *cs->statusp = resp->status = stat;
3237 3253 goto out;
3238 3254 }
3239 3255
3240 3256 /*
3241 3257 * Enter the critical region before calling VOP_RWLOCK
3242 3258 * to avoid a deadlock with write requests.
3243 3259 */
3244 3260 if (nbl_need_check(vp)) {
3245 3261 nbl_start_crit(vp, RW_READER);
3246 3262 in_crit = 1;
3247 3263 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3248 3264 &ct)) {
3249 3265 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3250 3266 goto out;
3251 3267 }
3252 3268 }
3253 3269
3254 3270 if (args->wlist) {
3255 3271 if (args->count > clist_len(args->wlist)) {
3256 3272 *cs->statusp = resp->status = NFS4ERR_INVAL;
3257 3273 goto out;
3258 3274 }
3259 3275 rdma_used = 1;
3260 3276 }
3261 3277
3262 3278 /* use loaned buffers for TCP */
3263 3279 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3264 3280
3265 3281 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3266 3282 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3267 3283
3268 3284 /*
3269 3285 * If we can't get the attributes, then we can't do the
3270 3286 * right access checking. So, we'll fail the request.
3271 3287 */
3272 3288 if (verror) {
3273 3289 *cs->statusp = resp->status = puterrno4(verror);
3274 3290 goto out;
3275 3291 }
3276 3292
3277 3293 if (vp->v_type != VREG) {
3278 3294 *cs->statusp = resp->status =
3279 3295 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3280 3296 goto out;
3281 3297 }
3282 3298
3283 3299 if (crgetuid(cs->cr) != va.va_uid &&
3284 3300 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3285 3301 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3286 3302 *cs->statusp = resp->status = puterrno4(error);
3287 3303 goto out;
3288 3304 }
3289 3305
3290 3306 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3291 3307 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3292 3308 goto out;
3293 3309 }
3294 3310
3295 3311 offset = args->offset;
3296 3312 if (offset >= va.va_size) {
3297 3313 *cs->statusp = resp->status = NFS4_OK;
3298 3314 resp->eof = TRUE;
3299 3315 resp->data_len = 0;
3300 3316 resp->data_val = NULL;
3301 3317 resp->mblk = NULL;
3302 3318 /* RDMA */
3303 3319 resp->wlist = args->wlist;
3304 3320 resp->wlist_len = resp->data_len;
3305 3321 *cs->statusp = resp->status = NFS4_OK;
3306 3322 if (resp->wlist)
3307 3323 clist_zero_len(resp->wlist);
3308 3324 goto out;
3309 3325 }
3310 3326
3311 3327 if (args->count == 0) {
3312 3328 *cs->statusp = resp->status = NFS4_OK;
3313 3329 resp->eof = FALSE;
3314 3330 resp->data_len = 0;
3315 3331 resp->data_val = NULL;
3316 3332 resp->mblk = NULL;
3317 3333 /* RDMA */
3318 3334 resp->wlist = args->wlist;
3319 3335 resp->wlist_len = resp->data_len;
3320 3336 if (resp->wlist)
3321 3337 clist_zero_len(resp->wlist);
3322 3338 goto out;
3323 3339 }
3324 3340
3325 3341 /*
3326 3342 * Do not allocate memory more than maximum allowed
3327 3343 * transfer size
3328 3344 */
3329 3345 if (args->count > rfs4_tsize(req))
3330 3346 args->count = rfs4_tsize(req);
3331 3347
3332 3348 if (loaned_buffers) {
3333 3349 uiop = (uio_t *)rfs_setup_xuio(vp);
3334 3350 ASSERT(uiop != NULL);
3335 3351 uiop->uio_segflg = UIO_SYSSPACE;
3336 3352 uiop->uio_loffset = args->offset;
3337 3353 uiop->uio_resid = args->count;
3338 3354
3339 3355 /* Jump to do the read if successful */
3340 3356 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3341 3357 /*
3342 3358 * Need to hold the vnode until after VOP_RETZCBUF()
3343 3359 * is called.
3344 3360 */
3345 3361 VN_HOLD(vp);
3346 3362 goto doio_read;
3347 3363 }
3348 3364
3349 3365 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3350 3366 uiop->uio_loffset, int, uiop->uio_resid);
3351 3367
3352 3368 uiop->uio_extflg = 0;
3353 3369
3354 3370 /* failure to setup for zero copy */
3355 3371 rfs_free_xuio((void *)uiop);
3356 3372 loaned_buffers = 0;
3357 3373 }
3358 3374
3359 3375 /*
3360 3376 * If returning data via RDMA Write, then grab the chunk list. If we
3361 3377 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3362 3378 */
3363 3379 if (rdma_used) {
3364 3380 mp = NULL;
3365 3381 (void) rdma_get_wchunk(req, &iov, args->wlist);
3366 3382 uio.uio_iov = &iov;
3367 3383 uio.uio_iovcnt = 1;
3368 3384 } else {
3369 3385 /*
3370 3386 * mp will contain the data to be sent out in the read reply.
3371 3387 * It will be freed after the reply has been sent.
3372 3388 */
3373 3389 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3374 3390 ASSERT(mp != NULL);
3375 3391 ASSERT(alloc_err == 0);
3376 3392 uio.uio_iov = iovp;
3377 3393 uio.uio_iovcnt = iovcnt;
3378 3394 }
3379 3395
3380 3396 uio.uio_segflg = UIO_SYSSPACE;
3381 3397 uio.uio_extflg = UIO_COPY_CACHED;
3382 3398 uio.uio_loffset = args->offset;
3383 3399 uio.uio_resid = args->count;
3384 3400 uiop = &uio;
3385 3401
3386 3402 doio_read:
3387 3403 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3388 3404
3389 3405 va.va_mask = AT_SIZE;
3390 3406 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3391 3407
3392 3408 if (error) {
3393 3409 if (mp)
3394 3410 freemsg(mp);
3395 3411 *cs->statusp = resp->status = puterrno4(error);
3396 3412 goto out;
3397 3413 }
3398 3414
3399 3415 /* make mblk using zc buffers */
3400 3416 if (loaned_buffers) {
3401 3417 mp = uio_to_mblk(uiop);
3402 3418 ASSERT(mp != NULL);
3403 3419 }
3404 3420
3405 3421 *cs->statusp = resp->status = NFS4_OK;
3406 3422
3407 3423 ASSERT(uiop->uio_resid >= 0);
3408 3424 resp->data_len = args->count - uiop->uio_resid;
3409 3425 if (mp) {
3410 3426 resp->data_val = (char *)mp->b_datap->db_base;
3411 3427 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3412 3428 } else {
3413 3429 resp->data_val = (caddr_t)iov.iov_base;
3414 3430 }
3415 3431
3416 3432 resp->mblk = mp;
3417 3433
3418 3434 if (!verror && offset + resp->data_len == va.va_size)
3419 3435 resp->eof = TRUE;
3420 3436 else
3421 3437 resp->eof = FALSE;
3422 3438
3423 3439 if (rdma_used) {
3424 3440 if (!rdma_setup_read_data4(args, resp)) {
3425 3441 *cs->statusp = resp->status = NFS4ERR_INVAL;
3426 3442 }
3427 3443 } else {
3428 3444 resp->wlist = NULL;
3429 3445 }
3430 3446
3431 3447 out:
3432 3448 if (in_crit)
3433 3449 nbl_end_crit(vp);
3434 3450
3435 3451 if (iovp != NULL)
3436 3452 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3437 3453
3438 3454 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3439 3455 READ4res *, resp);
3440 3456 }
3441 3457
3442 3458 static void
3443 3459 rfs4_op_read_free(nfs_resop4 *resop)
3444 3460 {
3445 3461 READ4res *resp = &resop->nfs_resop4_u.opread;
3446 3462
3447 3463 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3448 3464 freemsg(resp->mblk);
3449 3465 resp->mblk = NULL;
3450 3466 resp->data_val = NULL;
3451 3467 resp->data_len = 0;
3452 3468 }
3453 3469 }
3454 3470
3455 3471 static void
3456 3472 rfs4_op_readdir_free(nfs_resop4 * resop)
3457 3473 {
3458 3474 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3459 3475
3460 3476 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3461 3477 freeb(resp->mblk);
3462 3478 resp->mblk = NULL;
3463 3479 resp->data_len = 0;
3464 3480 }
3465 3481 }
3466 3482
3467 3483
3468 3484 /* ARGSUSED */
3469 3485 static void
3470 3486 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3471 3487 struct compound_state *cs)
3472 3488 {
3473 3489 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3474 3490 int error;
3475 3491 vnode_t *vp;
3476 3492 struct exportinfo *exi, *sav_exi;
3477 3493 nfs_fh4_fmt_t *fh_fmtp;
3478 3494 nfs_export_t *ne = nfs_get_export();
3479 3495
3480 3496 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3481 3497
3482 3498 if (cs->vp) {
3483 3499 VN_RELE(cs->vp);
3484 3500 cs->vp = NULL;
3485 3501 }
3486 3502
3487 3503 if (cs->cr)
3488 3504 crfree(cs->cr);
3489 3505
3490 3506 cs->cr = crdup(cs->basecr);
3491 3507
3492 3508 vp = ne->exi_public->exi_vp;
3493 3509 if (vp == NULL) {
3494 3510 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3495 3511 goto out;
3496 3512 }
3497 3513
3498 3514 error = makefh4(&cs->fh, vp, ne->exi_public);
3499 3515 if (error != 0) {
3500 3516 *cs->statusp = resp->status = puterrno4(error);
3501 3517 goto out;
3502 3518 }
3503 3519 sav_exi = cs->exi;
3504 3520 if (ne->exi_public == ne->exi_root) {
3505 3521 /*
3506 3522 * No filesystem is actually shared public, so we default
3507 3523 * to exi_root. In this case, we must check whether root
3508 3524 * is exported.
3509 3525 */
3510 3526 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3511 3527
3512 3528 /*
3513 3529 * if root filesystem is exported, the exportinfo struct that we
3514 3530 * should use is what checkexport4 returns, because root_exi is
3515 3531 * actually a mostly empty struct.
3516 3532 */
3517 3533 exi = checkexport4(&fh_fmtp->fh4_fsid,
3518 3534 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3519 3535 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3520 3536 } else {
3521 3537 /*
3522 3538 * it's a properly shared filesystem
3523 3539 */
3524 3540 cs->exi = ne->exi_public;
3525 3541 }
3526 3542
3527 3543 if (is_system_labeled()) {
3528 3544 bslabel_t *clabel;
3529 3545
3530 3546 ASSERT(req->rq_label != NULL);
3531 3547 clabel = req->rq_label;
3532 3548 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3533 3549 "got client label from request(1)",
3534 3550 struct svc_req *, req);
3535 3551 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3536 3552 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3537 3553 cs->exi)) {
3538 3554 *cs->statusp = resp->status =
3539 3555 NFS4ERR_SERVERFAULT;
3540 3556 goto out;
3541 3557 }
3542 3558 }
3543 3559 }
3544 3560
3545 3561 VN_HOLD(vp);
3546 3562 cs->vp = vp;
3547 3563
3548 3564 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3549 3565 VN_RELE(cs->vp);
3550 3566 cs->vp = NULL;
3551 3567 cs->exi = sav_exi;
3552 3568 goto out;
3553 3569 }
3554 3570
3555 3571 *cs->statusp = resp->status = NFS4_OK;
3556 3572 out:
3557 3573 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3558 3574 PUTPUBFH4res *, resp);
3559 3575 }
3560 3576
3561 3577 /*
3562 3578 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3563 3579 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3564 3580 * or joe have restrictive search permissions, then we shouldn't let
3565 3581 * the client get a file handle. This is easy to enforce. However, we
3566 3582 * don't know what security flavor should be used until we resolve the
3567 3583 * path name. Another complication is uid mapping. If root is
3568 3584 * the user, then it will be mapped to the anonymous user by default,
3569 3585 * but we won't know that till we've resolved the path name. And we won't
3570 3586 * know what the anonymous user is.
3571 3587 * Luckily, SECINFO is specified to take a full filename.
3572 3588 * So what we will have to in rfs4_op_lookup is check that flavor of
3573 3589 * the target object matches that of the request, and if root was the
3574 3590 * caller, check for the root= and anon= options, and if necessary,
3575 3591 * repeat the lookup using the right cred_t. But that's not done yet.
3576 3592 */
3577 3593 /* ARGSUSED */
3578 3594 static void
3579 3595 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3580 3596 struct compound_state *cs)
3581 3597 {
3582 3598 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3583 3599 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3584 3600 nfs_fh4_fmt_t *fh_fmtp;
3585 3601
3586 3602 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3587 3603 PUTFH4args *, args);
3588 3604
3589 3605 if (cs->vp) {
3590 3606 VN_RELE(cs->vp);
3591 3607 cs->vp = NULL;
3592 3608 }
3593 3609
3594 3610 if (cs->cr) {
3595 3611 crfree(cs->cr);
3596 3612 cs->cr = NULL;
3597 3613 }
3598 3614
3599 3615
3600 3616 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3601 3617 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3602 3618 goto out;
3603 3619 }
3604 3620
3605 3621 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3606 3622 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3607 3623 NULL);
3608 3624
3609 3625 if (cs->exi == NULL) {
3610 3626 *cs->statusp = resp->status = NFS4ERR_STALE;
3611 3627 goto out;
3612 3628 }
3613 3629
3614 3630 cs->cr = crdup(cs->basecr);
3615 3631
3616 3632 ASSERT(cs->cr != NULL);
3617 3633
3618 3634 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3619 3635 *cs->statusp = resp->status;
3620 3636 goto out;
3621 3637 }
3622 3638
3623 3639 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3624 3640 VN_RELE(cs->vp);
3625 3641 cs->vp = NULL;
3626 3642 goto out;
3627 3643 }
3628 3644
3629 3645 nfs_fh4_copy(&args->object, &cs->fh);
3630 3646 *cs->statusp = resp->status = NFS4_OK;
3631 3647 cs->deleg = FALSE;
3632 3648
3633 3649 out:
3634 3650 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3635 3651 PUTFH4res *, resp);
3636 3652 }
3637 3653
3638 3654 /* ARGSUSED */
3639 3655 static void
3640 3656 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3641 3657 struct compound_state *cs)
3642 3658 {
3643 3659 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3644 3660 int error;
3645 3661 fid_t fid;
3646 3662 struct exportinfo *exi, *sav_exi;
3647 3663
3648 3664 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3649 3665
3650 3666 if (cs->vp) {
3651 3667 VN_RELE(cs->vp);
3652 3668 cs->vp = NULL;
3653 3669 }
3654 3670
3655 3671 if (cs->cr)
3656 3672 crfree(cs->cr);
3657 3673
3658 3674 cs->cr = crdup(cs->basecr);
3659 3675
3660 3676 /*
3661 3677 * Using rootdir, the system root vnode,
3662 3678 * get its fid.
3663 3679 */
3664 3680 bzero(&fid, sizeof (fid));
3665 3681 fid.fid_len = MAXFIDSZ;
3666 3682 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3667 3683 if (error != 0) {
3668 3684 *cs->statusp = resp->status = puterrno4(error);
3669 3685 goto out;
3670 3686 }
3671 3687
3672 3688 /*
3673 3689 * Then use the root fsid & fid it to find out if it's exported
3674 3690 *
3675 3691 * If the server root isn't exported directly, then
3676 3692 * it should at least be a pseudo export based on
3677 3693 * one or more exports further down in the server's
3678 3694 * file tree.
3679 3695 */
3680 3696 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3681 3697 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3682 3698 NFS4_DEBUG(rfs4_debug,
3683 3699 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3684 3700 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3685 3701 goto out;
3686 3702 }
3687 3703
3688 3704 /*
3689 3705 * Now make a filehandle based on the root
3690 3706 * export and root vnode.
3691 3707 */
3692 3708 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3693 3709 if (error != 0) {
3694 3710 *cs->statusp = resp->status = puterrno4(error);
3695 3711 goto out;
3696 3712 }
3697 3713
3698 3714 sav_exi = cs->exi;
3699 3715 cs->exi = exi;
3700 3716
3701 3717 VN_HOLD(ZONE_ROOTVP());
3702 3718 cs->vp = ZONE_ROOTVP();
3703 3719
3704 3720 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3705 3721 VN_RELE(cs->vp);
3706 3722 cs->vp = NULL;
3707 3723 cs->exi = sav_exi;
3708 3724 goto out;
3709 3725 }
3710 3726
3711 3727 *cs->statusp = resp->status = NFS4_OK;
3712 3728 cs->deleg = FALSE;
3713 3729 out:
3714 3730 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3715 3731 PUTROOTFH4res *, resp);
3716 3732 }
3717 3733
3718 3734 /*
3719 3735 * readlink: args: CURRENT_FH.
3720 3736 * res: status. If success - CURRENT_FH unchanged, return linktext.
3721 3737 */
3722 3738
3723 3739 /* ARGSUSED */
3724 3740 static void
3725 3741 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3726 3742 struct compound_state *cs)
3727 3743 {
3728 3744 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3729 3745 int error;
3730 3746 vnode_t *vp;
3731 3747 struct iovec iov;
3732 3748 struct vattr va;
3733 3749 struct uio uio;
3734 3750 char *data;
3735 3751 struct sockaddr *ca;
3736 3752 char *name = NULL;
3737 3753 int is_referral;
3738 3754
3739 3755 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3740 3756
3741 3757 /* CURRENT_FH: directory */
3742 3758 vp = cs->vp;
3743 3759 if (vp == NULL) {
3744 3760 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3745 3761 goto out;
3746 3762 }
3747 3763
3748 3764 if (cs->access == CS_ACCESS_DENIED) {
3749 3765 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3750 3766 goto out;
3751 3767 }
3752 3768
3753 3769 /* Is it a referral? */
3754 3770 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3755 3771
3756 3772 is_referral = 1;
3757 3773
3758 3774 } else {
3759 3775
3760 3776 is_referral = 0;
3761 3777
3762 3778 if (vp->v_type == VDIR) {
3763 3779 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3764 3780 goto out;
3765 3781 }
3766 3782
3767 3783 if (vp->v_type != VLNK) {
3768 3784 *cs->statusp = resp->status = NFS4ERR_INVAL;
3769 3785 goto out;
3770 3786 }
3771 3787
3772 3788 }
3773 3789
3774 3790 va.va_mask = AT_MODE;
3775 3791 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3776 3792 if (error) {
3777 3793 *cs->statusp = resp->status = puterrno4(error);
3778 3794 goto out;
3779 3795 }
3780 3796
3781 3797 if (MANDLOCK(vp, va.va_mode)) {
3782 3798 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3783 3799 goto out;
3784 3800 }
3785 3801
3786 3802 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3787 3803
3788 3804 if (is_referral) {
3789 3805 char *s;
3790 3806 size_t strsz;
3791 3807
3792 3808 /* Get an artificial symlink based on a referral */
3793 3809 s = build_symlink(vp, cs->cr, &strsz);
3794 3810 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3795 3811 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3796 3812 vnode_t *, vp, char *, s);
3797 3813 if (s == NULL)
3798 3814 error = EINVAL;
3799 3815 else {
3800 3816 error = 0;
3801 3817 (void) strlcpy(data, s, MAXPATHLEN + 1);
3802 3818 kmem_free(s, strsz);
3803 3819 }
3804 3820
3805 3821 } else {
3806 3822
3807 3823 iov.iov_base = data;
3808 3824 iov.iov_len = MAXPATHLEN;
3809 3825 uio.uio_iov = &iov;
3810 3826 uio.uio_iovcnt = 1;
3811 3827 uio.uio_segflg = UIO_SYSSPACE;
3812 3828 uio.uio_extflg = UIO_COPY_CACHED;
3813 3829 uio.uio_loffset = 0;
3814 3830 uio.uio_resid = MAXPATHLEN;
3815 3831
3816 3832 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3817 3833
3818 3834 if (!error)
3819 3835 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3820 3836 }
3821 3837
3822 3838 if (error) {
3823 3839 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3824 3840 *cs->statusp = resp->status = puterrno4(error);
3825 3841 goto out;
3826 3842 }
3827 3843
3828 3844 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3829 3845 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3830 3846 MAXPATHLEN + 1);
3831 3847
3832 3848 if (name == NULL) {
3833 3849 /*
3834 3850 * Even though the conversion failed, we return
3835 3851 * something. We just don't translate it.
3836 3852 */
3837 3853 name = data;
3838 3854 }
3839 3855
3840 3856 /*
3841 3857 * treat link name as data
3842 3858 */
3843 3859 (void) str_to_utf8(name, (utf8string *)&resp->link);
3844 3860
3845 3861 if (name != data)
3846 3862 kmem_free(name, MAXPATHLEN + 1);
3847 3863 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3848 3864 *cs->statusp = resp->status = NFS4_OK;
3849 3865
3850 3866 out:
3851 3867 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3852 3868 READLINK4res *, resp);
3853 3869 }
3854 3870
3855 3871 static void
3856 3872 rfs4_op_readlink_free(nfs_resop4 *resop)
3857 3873 {
3858 3874 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3859 3875 utf8string *symlink = (utf8string *)&resp->link;
3860 3876
3861 3877 if (symlink->utf8string_val) {
3862 3878 UTF8STRING_FREE(*symlink)
3863 3879 }
3864 3880 }
3865 3881
3866 3882 /*
3867 3883 * release_lockowner:
3868 3884 * Release any state associated with the supplied
3869 3885 * lockowner. Note if any lo_state is holding locks we will not
3870 3886 * rele that lo_state and thus the lockowner will not be destroyed.
3871 3887 * A client using lock after the lock owner stateid has been released
3872 3888 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3873 3889 * to reissue the lock with new_lock_owner set to TRUE.
3874 3890 * args: lock_owner
3875 3891 * res: status
3876 3892 */
3877 3893 /* ARGSUSED */
3878 3894 static void
3879 3895 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3880 3896 struct svc_req *req, struct compound_state *cs)
3881 3897 {
3882 3898 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3883 3899 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3884 3900 rfs4_lockowner_t *lo;
3885 3901 rfs4_openowner_t *oo;
3886 3902 rfs4_state_t *sp;
3887 3903 rfs4_lo_state_t *lsp;
3888 3904 rfs4_client_t *cp;
3889 3905 bool_t create = FALSE;
3890 3906 locklist_t *llist;
3891 3907 sysid_t sysid;
3892 3908
3893 3909 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3894 3910 cs, RELEASE_LOCKOWNER4args *, ap);
3895 3911
3896 3912 /* Make sure there is a clientid around for this request */
3897 3913 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3898 3914
3899 3915 if (cp == NULL) {
3900 3916 *cs->statusp = resp->status =
3901 3917 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3902 3918 goto out;
3903 3919 }
3904 3920 rfs4_client_rele(cp);
3905 3921
3906 3922 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3907 3923 if (lo == NULL) {
3908 3924 *cs->statusp = resp->status = NFS4_OK;
3909 3925 goto out;
3910 3926 }
3911 3927 ASSERT(lo->rl_client != NULL);
3912 3928
3913 3929 /*
3914 3930 * Check for EXPIRED client. If so will reap state with in a lease
3915 3931 * period or on next set_clientid_confirm step
3916 3932 */
3917 3933 if (rfs4_lease_expired(lo->rl_client)) {
3918 3934 rfs4_lockowner_rele(lo);
3919 3935 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3920 3936 goto out;
3921 3937 }
3922 3938
3923 3939 /*
3924 3940 * If no sysid has been assigned, then no locks exist; just return.
3925 3941 */
3926 3942 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3927 3943 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3928 3944 rfs4_lockowner_rele(lo);
3929 3945 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3930 3946 goto out;
3931 3947 }
3932 3948
3933 3949 sysid = lo->rl_client->rc_sysidt;
3934 3950 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3935 3951
3936 3952 /*
3937 3953 * Mark the lockowner invalid.
3938 3954 */
3939 3955 rfs4_dbe_hide(lo->rl_dbe);
3940 3956
3941 3957 /*
3942 3958 * sysid-pid pair should now not be used since the lockowner is
3943 3959 * invalid. If the client were to instantiate the lockowner again
3944 3960 * it would be assigned a new pid. Thus we can get the list of
3945 3961 * current locks.
3946 3962 */
3947 3963
3948 3964 llist = flk_get_active_locks(sysid, lo->rl_pid);
3949 3965 /* If we are still holding locks fail */
3950 3966 if (llist != NULL) {
3951 3967
3952 3968 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3953 3969
3954 3970 flk_free_locklist(llist);
3955 3971 /*
3956 3972 * We need to unhide the lockowner so the client can
3957 3973 * try it again. The bad thing here is if the client
3958 3974 * has a logic error that took it here in the first place
3959 3975 * they probably have lost accounting of the locks that it
3960 3976 * is holding. So we may have dangling state until the
3961 3977 * open owner state is reaped via close. One scenario
3962 3978 * that could possibly occur is that the client has
3963 3979 * sent the unlock request(s) in separate threads
3964 3980 * and has not waited for the replies before sending the
3965 3981 * RELEASE_LOCKOWNER request. Presumably, it would expect
3966 3982 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3967 3983 * reissuing the request.
3968 3984 */
3969 3985 rfs4_dbe_unhide(lo->rl_dbe);
3970 3986 rfs4_lockowner_rele(lo);
3971 3987 goto out;
3972 3988 }
3973 3989
3974 3990 /*
3975 3991 * For the corresponding client we need to check each open
3976 3992 * owner for any opens that have lockowner state associated
3977 3993 * with this lockowner.
3978 3994 */
3979 3995
3980 3996 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3981 3997 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3982 3998 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3983 3999
3984 4000 rfs4_dbe_lock(oo->ro_dbe);
3985 4001 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3986 4002 sp = list_next(&oo->ro_statelist, sp)) {
3987 4003
3988 4004 rfs4_dbe_lock(sp->rs_dbe);
3989 4005 for (lsp = list_head(&sp->rs_lostatelist);
3990 4006 lsp != NULL;
3991 4007 lsp = list_next(&sp->rs_lostatelist, lsp)) {
3992 4008 if (lsp->rls_locker == lo) {
3993 4009 rfs4_dbe_lock(lsp->rls_dbe);
3994 4010 rfs4_dbe_invalidate(lsp->rls_dbe);
3995 4011 rfs4_dbe_unlock(lsp->rls_dbe);
3996 4012 }
3997 4013 }
3998 4014 rfs4_dbe_unlock(sp->rs_dbe);
3999 4015 }
4000 4016 rfs4_dbe_unlock(oo->ro_dbe);
4001 4017 }
4002 4018 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4003 4019
4004 4020 rfs4_lockowner_rele(lo);
4005 4021
4006 4022 *cs->statusp = resp->status = NFS4_OK;
4007 4023
4008 4024 out:
4009 4025 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4010 4026 cs, RELEASE_LOCKOWNER4res *, resp);
4011 4027 }
4012 4028
4013 4029 /*
4014 4030 * short utility function to lookup a file and recall the delegation
4015 4031 */
4016 4032 static rfs4_file_t *
4017 4033 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4018 4034 int *lkup_error, cred_t *cr)
4019 4035 {
4020 4036 vnode_t *vp;
4021 4037 rfs4_file_t *fp = NULL;
4022 4038 bool_t fcreate = FALSE;
4023 4039 int error;
4024 4040
4025 4041 if (vpp)
4026 4042 *vpp = NULL;
4027 4043
4028 4044 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4029 4045 NULL)) == 0) {
4030 4046 if (vp->v_type == VREG)
4031 4047 fp = rfs4_findfile(vp, NULL, &fcreate);
4032 4048 if (vpp)
4033 4049 *vpp = vp;
4034 4050 else
4035 4051 VN_RELE(vp);
4036 4052 }
4037 4053
4038 4054 if (lkup_error)
4039 4055 *lkup_error = error;
4040 4056
4041 4057 return (fp);
4042 4058 }
4043 4059
4044 4060 /*
4045 4061 * remove: args: CURRENT_FH: directory; name.
4046 4062 * res: status. If success - CURRENT_FH unchanged, return change_info
4047 4063 * for directory.
4048 4064 */
4049 4065 /* ARGSUSED */
4050 4066 static void
4051 4067 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4052 4068 struct compound_state *cs)
4053 4069 {
4054 4070 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4055 4071 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4056 4072 int error;
4057 4073 vnode_t *dvp, *vp;
4058 4074 struct vattr bdva, idva, adva;
4059 4075 char *nm;
4060 4076 uint_t len;
4061 4077 rfs4_file_t *fp;
4062 4078 int in_crit = 0;
4063 4079 bslabel_t *clabel;
4064 4080 struct sockaddr *ca;
4065 4081 char *name = NULL;
4066 4082 nfsstat4 status;
4067 4083
4068 4084 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4069 4085 REMOVE4args *, args);
4070 4086
4071 4087 /* CURRENT_FH: directory */
4072 4088 dvp = cs->vp;
4073 4089 if (dvp == NULL) {
4074 4090 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4075 4091 goto out;
4076 4092 }
4077 4093
4078 4094 if (cs->access == CS_ACCESS_DENIED) {
4079 4095 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4080 4096 goto out;
4081 4097 }
4082 4098
4083 4099 /*
4084 4100 * If there is an unshared filesystem mounted on this vnode,
4085 4101 * Do not allow to remove anything in this directory.
4086 4102 */
4087 4103 if (vn_ismntpt(dvp)) {
4088 4104 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4089 4105 goto out;
4090 4106 }
4091 4107
4092 4108 if (dvp->v_type != VDIR) {
4093 4109 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4094 4110 goto out;
4095 4111 }
4096 4112
4097 4113 status = utf8_dir_verify(&args->target);
4098 4114 if (status != NFS4_OK) {
4099 4115 *cs->statusp = resp->status = status;
4100 4116 goto out;
4101 4117 }
4102 4118
4103 4119 /*
4104 4120 * Lookup the file so that we can check if it's a directory
4105 4121 */
4106 4122 nm = utf8_to_fn(&args->target, &len, NULL);
4107 4123 if (nm == NULL) {
4108 4124 *cs->statusp = resp->status = NFS4ERR_INVAL;
4109 4125 goto out;
4110 4126 }
4111 4127
4112 4128 if (len > MAXNAMELEN) {
4113 4129 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4114 4130 kmem_free(nm, len);
4115 4131 goto out;
4116 4132 }
4117 4133
4118 4134 if (rdonly4(req, cs)) {
4119 4135 *cs->statusp = resp->status = NFS4ERR_ROFS;
4120 4136 kmem_free(nm, len);
4121 4137 goto out;
4122 4138 }
4123 4139
4124 4140 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4125 4141 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4126 4142 MAXPATHLEN + 1);
4127 4143
4128 4144 if (name == NULL) {
4129 4145 *cs->statusp = resp->status = NFS4ERR_INVAL;
4130 4146 kmem_free(nm, len);
4131 4147 goto out;
4132 4148 }
4133 4149
4134 4150 /*
4135 4151 * Lookup the file to determine type and while we are see if
4136 4152 * there is a file struct around and check for delegation.
4137 4153 * We don't need to acquire va_seq before this lookup, if
4138 4154 * it causes an update, cinfo.before will not match, which will
4139 4155 * trigger a cache flush even if atomic is TRUE.
4140 4156 */
4141 4157 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4142 4158 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4143 4159 NULL)) {
4144 4160 VN_RELE(vp);
4145 4161 rfs4_file_rele(fp);
4146 4162 *cs->statusp = resp->status = NFS4ERR_DELAY;
4147 4163 if (nm != name)
4148 4164 kmem_free(name, MAXPATHLEN + 1);
4149 4165 kmem_free(nm, len);
4150 4166 goto out;
4151 4167 }
4152 4168 }
4153 4169
4154 4170 /* Didn't find anything to remove */
4155 4171 if (vp == NULL) {
4156 4172 *cs->statusp = resp->status = error;
4157 4173 if (nm != name)
4158 4174 kmem_free(name, MAXPATHLEN + 1);
4159 4175 kmem_free(nm, len);
4160 4176 goto out;
4161 4177 }
4162 4178
4163 4179 if (nbl_need_check(vp)) {
4164 4180 nbl_start_crit(vp, RW_READER);
4165 4181 in_crit = 1;
4166 4182 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4167 4183 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4168 4184 if (nm != name)
4169 4185 kmem_free(name, MAXPATHLEN + 1);
4170 4186 kmem_free(nm, len);
4171 4187 nbl_end_crit(vp);
4172 4188 VN_RELE(vp);
4173 4189 if (fp) {
4174 4190 rfs4_clear_dont_grant(fp);
4175 4191 rfs4_file_rele(fp);
4176 4192 }
4177 4193 goto out;
4178 4194 }
4179 4195 }
4180 4196
4181 4197 /* check label before allowing removal */
4182 4198 if (is_system_labeled()) {
4183 4199 ASSERT(req->rq_label != NULL);
4184 4200 clabel = req->rq_label;
4185 4201 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4186 4202 "got client label from request(1)",
4187 4203 struct svc_req *, req);
4188 4204 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4189 4205 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4190 4206 cs->exi)) {
4191 4207 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4192 4208 if (name != nm)
4193 4209 kmem_free(name, MAXPATHLEN + 1);
4194 4210 kmem_free(nm, len);
4195 4211 if (in_crit)
4196 4212 nbl_end_crit(vp);
4197 4213 VN_RELE(vp);
4198 4214 if (fp) {
4199 4215 rfs4_clear_dont_grant(fp);
4200 4216 rfs4_file_rele(fp);
4201 4217 }
4202 4218 goto out;
4203 4219 }
4204 4220 }
4205 4221 }
4206 4222
4207 4223 /* Get dir "before" change value */
4208 4224 bdva.va_mask = AT_CTIME|AT_SEQ;
4209 4225 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4210 4226 if (error) {
4211 4227 *cs->statusp = resp->status = puterrno4(error);
4212 4228 if (nm != name)
4213 4229 kmem_free(name, MAXPATHLEN + 1);
4214 4230 kmem_free(nm, len);
4215 4231 if (in_crit)
4216 4232 nbl_end_crit(vp);
4217 4233 VN_RELE(vp);
4218 4234 if (fp) {
4219 4235 rfs4_clear_dont_grant(fp);
4220 4236 rfs4_file_rele(fp);
4221 4237 }
4222 4238 goto out;
4223 4239 }
4224 4240 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4225 4241
4226 4242 /* Actually do the REMOVE operation */
4227 4243 if (vp->v_type == VDIR) {
4228 4244 /*
4229 4245 * Can't remove a directory that has a mounted-on filesystem.
4230 4246 */
4231 4247 if (vn_ismntpt(vp)) {
4232 4248 error = EACCES;
4233 4249 } else {
4234 4250 /*
4235 4251 * System V defines rmdir to return EEXIST,
4236 4252 * not ENOTEMPTY, if the directory is not
4237 4253 * empty. A System V NFS server needs to map
4238 4254 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4239 4255 * transmit over the wire.
4240 4256 */
4241 4257 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4242 4258 NULL, 0)) == EEXIST)
4243 4259 error = ENOTEMPTY;
4244 4260 }
4245 4261 } else {
4246 4262 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4247 4263 fp != NULL) {
4248 4264 struct vattr va;
4249 4265 vnode_t *tvp;
4250 4266
4251 4267 rfs4_dbe_lock(fp->rf_dbe);
4252 4268 tvp = fp->rf_vp;
4253 4269 if (tvp)
4254 4270 VN_HOLD(tvp);
4255 4271 rfs4_dbe_unlock(fp->rf_dbe);
4256 4272
4257 4273 if (tvp) {
4258 4274 /*
4259 4275 * This is va_seq safe because we are not
4260 4276 * manipulating dvp.
4261 4277 */
4262 4278 va.va_mask = AT_NLINK;
4263 4279 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4264 4280 va.va_nlink == 0) {
4265 4281 /* Remove state on file remove */
4266 4282 if (in_crit) {
4267 4283 nbl_end_crit(vp);
4268 4284 in_crit = 0;
4269 4285 }
4270 4286 rfs4_close_all_state(fp);
4271 4287 }
4272 4288 VN_RELE(tvp);
4273 4289 }
4274 4290 }
4275 4291 }
4276 4292
4277 4293 if (in_crit)
4278 4294 nbl_end_crit(vp);
4279 4295 VN_RELE(vp);
4280 4296
4281 4297 if (fp) {
4282 4298 rfs4_clear_dont_grant(fp);
4283 4299 rfs4_file_rele(fp);
4284 4300 }
4285 4301 if (nm != name)
4286 4302 kmem_free(name, MAXPATHLEN + 1);
4287 4303 kmem_free(nm, len);
4288 4304
4289 4305 if (error) {
4290 4306 *cs->statusp = resp->status = puterrno4(error);
4291 4307 goto out;
4292 4308 }
4293 4309
4294 4310 /*
4295 4311 * Get the initial "after" sequence number, if it fails, set to zero
4296 4312 */
4297 4313 idva.va_mask = AT_SEQ;
4298 4314 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4299 4315 idva.va_seq = 0;
4300 4316
4301 4317 /*
4302 4318 * Force modified data and metadata out to stable storage.
4303 4319 */
4304 4320 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4305 4321
4306 4322 /*
4307 4323 * Get "after" change value, if it fails, simply return the
4308 4324 * before value.
4309 4325 */
4310 4326 adva.va_mask = AT_CTIME|AT_SEQ;
4311 4327 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4312 4328 adva.va_ctime = bdva.va_ctime;
4313 4329 adva.va_seq = 0;
4314 4330 }
4315 4331
4316 4332 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4317 4333
4318 4334 /*
4319 4335 * The cinfo.atomic = TRUE only if we have
4320 4336 * non-zero va_seq's, and it has incremented by exactly one
4321 4337 * during the VOP_REMOVE/RMDIR and it didn't change during
4322 4338 * the VOP_FSYNC.
4323 4339 */
4324 4340 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4325 4341 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4326 4342 resp->cinfo.atomic = TRUE;
4327 4343 else
4328 4344 resp->cinfo.atomic = FALSE;
4329 4345
4330 4346 *cs->statusp = resp->status = NFS4_OK;
4331 4347
4332 4348 out:
4333 4349 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4334 4350 REMOVE4res *, resp);
4335 4351 }
4336 4352
4337 4353 /*
4338 4354 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4339 4355 * oldname and newname.
4340 4356 * res: status. If success - CURRENT_FH unchanged, return change_info
4341 4357 * for both from and target directories.
4342 4358 */
4343 4359 /* ARGSUSED */
4344 4360 static void
4345 4361 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4346 4362 struct compound_state *cs)
4347 4363 {
4348 4364 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4349 4365 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4350 4366 int error;
4351 4367 vnode_t *odvp;
4352 4368 vnode_t *ndvp;
4353 4369 vnode_t *srcvp, *targvp, *tvp;
4354 4370 struct vattr obdva, oidva, oadva;
4355 4371 struct vattr nbdva, nidva, nadva;
4356 4372 char *onm, *nnm;
4357 4373 uint_t olen, nlen;
4358 4374 rfs4_file_t *fp, *sfp;
4359 4375 int in_crit_src, in_crit_targ;
4360 4376 int fp_rele_grant_hold, sfp_rele_grant_hold;
4361 4377 int unlinked;
4362 4378 bslabel_t *clabel;
4363 4379 struct sockaddr *ca;
4364 4380 char *converted_onm = NULL;
4365 4381 char *converted_nnm = NULL;
4366 4382 nfsstat4 status;
4367 4383
4368 4384 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4369 4385 RENAME4args *, args);
4370 4386
4371 4387 fp = sfp = NULL;
4372 4388 srcvp = targvp = tvp = NULL;
4373 4389 in_crit_src = in_crit_targ = 0;
4374 4390 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4375 4391 unlinked = 0;
4376 4392
4377 4393 /* CURRENT_FH: target directory */
4378 4394 ndvp = cs->vp;
4379 4395 if (ndvp == NULL) {
4380 4396 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4381 4397 goto out;
4382 4398 }
4383 4399
4384 4400 /* SAVED_FH: from directory */
4385 4401 odvp = cs->saved_vp;
4386 4402 if (odvp == NULL) {
4387 4403 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4388 4404 goto out;
4389 4405 }
4390 4406
4391 4407 if (cs->access == CS_ACCESS_DENIED) {
4392 4408 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4393 4409 goto out;
4394 4410 }
4395 4411
4396 4412 /*
4397 4413 * If there is an unshared filesystem mounted on this vnode,
4398 4414 * do not allow to rename objects in this directory.
4399 4415 */
4400 4416 if (vn_ismntpt(odvp)) {
4401 4417 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4402 4418 goto out;
4403 4419 }
4404 4420
4405 4421 /*
4406 4422 * If there is an unshared filesystem mounted on this vnode,
4407 4423 * do not allow to rename to this directory.
4408 4424 */
4409 4425 if (vn_ismntpt(ndvp)) {
4410 4426 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4411 4427 goto out;
4412 4428 }
4413 4429
4414 4430 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4415 4431 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4416 4432 goto out;
4417 4433 }
4418 4434
4419 4435 if (cs->saved_exi != cs->exi) {
4420 4436 *cs->statusp = resp->status = NFS4ERR_XDEV;
4421 4437 goto out;
4422 4438 }
4423 4439
4424 4440 status = utf8_dir_verify(&args->oldname);
4425 4441 if (status != NFS4_OK) {
4426 4442 *cs->statusp = resp->status = status;
4427 4443 goto out;
4428 4444 }
4429 4445
4430 4446 status = utf8_dir_verify(&args->newname);
4431 4447 if (status != NFS4_OK) {
4432 4448 *cs->statusp = resp->status = status;
4433 4449 goto out;
4434 4450 }
4435 4451
4436 4452 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4437 4453 if (onm == NULL) {
4438 4454 *cs->statusp = resp->status = NFS4ERR_INVAL;
4439 4455 goto out;
4440 4456 }
4441 4457 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4442 4458 nlen = MAXPATHLEN + 1;
4443 4459 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4444 4460 nlen);
4445 4461
4446 4462 if (converted_onm == NULL) {
4447 4463 *cs->statusp = resp->status = NFS4ERR_INVAL;
4448 4464 kmem_free(onm, olen);
4449 4465 goto out;
4450 4466 }
4451 4467
4452 4468 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4453 4469 if (nnm == NULL) {
4454 4470 *cs->statusp = resp->status = NFS4ERR_INVAL;
4455 4471 if (onm != converted_onm)
4456 4472 kmem_free(converted_onm, MAXPATHLEN + 1);
4457 4473 kmem_free(onm, olen);
4458 4474 goto out;
4459 4475 }
4460 4476 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4461 4477 MAXPATHLEN + 1);
4462 4478
4463 4479 if (converted_nnm == NULL) {
4464 4480 *cs->statusp = resp->status = NFS4ERR_INVAL;
4465 4481 kmem_free(nnm, nlen);
4466 4482 nnm = NULL;
4467 4483 if (onm != converted_onm)
4468 4484 kmem_free(converted_onm, MAXPATHLEN + 1);
4469 4485 kmem_free(onm, olen);
4470 4486 goto out;
4471 4487 }
4472 4488
4473 4489
4474 4490 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4475 4491 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4476 4492 kmem_free(onm, olen);
4477 4493 kmem_free(nnm, nlen);
4478 4494 goto out;
4479 4495 }
4480 4496
4481 4497
4482 4498 if (rdonly4(req, cs)) {
4483 4499 *cs->statusp = resp->status = NFS4ERR_ROFS;
4484 4500 if (onm != converted_onm)
4485 4501 kmem_free(converted_onm, MAXPATHLEN + 1);
4486 4502 kmem_free(onm, olen);
4487 4503 if (nnm != converted_nnm)
4488 4504 kmem_free(converted_nnm, MAXPATHLEN + 1);
4489 4505 kmem_free(nnm, nlen);
4490 4506 goto out;
4491 4507 }
4492 4508
4493 4509 /* check label of the target dir */
4494 4510 if (is_system_labeled()) {
4495 4511 ASSERT(req->rq_label != NULL);
4496 4512 clabel = req->rq_label;
4497 4513 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4498 4514 "got client label from request(1)",
4499 4515 struct svc_req *, req);
4500 4516 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4501 4517 if (!do_rfs_label_check(clabel, ndvp,
4502 4518 EQUALITY_CHECK, cs->exi)) {
4503 4519 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4504 4520 goto err_out;
4505 4521 }
4506 4522 }
4507 4523 }
4508 4524
4509 4525 /*
4510 4526 * Is the source a file and have a delegation?
4511 4527 * We don't need to acquire va_seq before these lookups, if
4512 4528 * it causes an update, cinfo.before will not match, which will
4513 4529 * trigger a cache flush even if atomic is TRUE.
4514 4530 */
4515 4531 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4516 4532 &error, cs->cr)) {
4517 4533 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4518 4534 NULL)) {
4519 4535 *cs->statusp = resp->status = NFS4ERR_DELAY;
4520 4536 goto err_out;
4521 4537 }
4522 4538 }
4523 4539
4524 4540 if (srcvp == NULL) {
4525 4541 *cs->statusp = resp->status = puterrno4(error);
4526 4542 if (onm != converted_onm)
4527 4543 kmem_free(converted_onm, MAXPATHLEN + 1);
4528 4544 kmem_free(onm, olen);
4529 4545 if (nnm != converted_nnm)
4530 4546 kmem_free(converted_nnm, MAXPATHLEN + 1);
4531 4547 kmem_free(nnm, nlen);
4532 4548 goto out;
4533 4549 }
4534 4550
4535 4551 sfp_rele_grant_hold = 1;
4536 4552
4537 4553 /* Does the destination exist and a file and have a delegation? */
4538 4554 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4539 4555 NULL, cs->cr)) {
4540 4556 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4541 4557 NULL)) {
4542 4558 *cs->statusp = resp->status = NFS4ERR_DELAY;
4543 4559 goto err_out;
4544 4560 }
4545 4561 }
4546 4562 fp_rele_grant_hold = 1;
4547 4563
4548 4564 /* Check for NBMAND lock on both source and target */
4549 4565 if (nbl_need_check(srcvp)) {
4550 4566 nbl_start_crit(srcvp, RW_READER);
4551 4567 in_crit_src = 1;
4552 4568 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4553 4569 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4554 4570 goto err_out;
4555 4571 }
4556 4572 }
4557 4573
4558 4574 if (targvp && nbl_need_check(targvp)) {
4559 4575 nbl_start_crit(targvp, RW_READER);
4560 4576 in_crit_targ = 1;
4561 4577 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4562 4578 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4563 4579 goto err_out;
4564 4580 }
4565 4581 }
4566 4582
4567 4583 /* Get source "before" change value */
4568 4584 obdva.va_mask = AT_CTIME|AT_SEQ;
4569 4585 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4570 4586 if (!error) {
4571 4587 nbdva.va_mask = AT_CTIME|AT_SEQ;
4572 4588 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4573 4589 }
4574 4590 if (error) {
4575 4591 *cs->statusp = resp->status = puterrno4(error);
4576 4592 goto err_out;
4577 4593 }
4578 4594
4579 4595 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4580 4596 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4581 4597
4582 4598 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4583 4599 NULL, 0);
4584 4600
4585 4601 /*
4586 4602 * If target existed and was unlinked by VOP_RENAME, state will need
4587 4603 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4588 4604 * any necessary nbl_end_crit on srcvp and tgtvp.
4589 4605 */
4590 4606 if (error == 0 && fp != NULL) {
4591 4607 rfs4_dbe_lock(fp->rf_dbe);
4592 4608 tvp = fp->rf_vp;
4593 4609 if (tvp)
4594 4610 VN_HOLD(tvp);
4595 4611 rfs4_dbe_unlock(fp->rf_dbe);
4596 4612
4597 4613 if (tvp) {
4598 4614 struct vattr va;
4599 4615 va.va_mask = AT_NLINK;
4600 4616
4601 4617 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4602 4618 va.va_nlink == 0) {
4603 4619 unlinked = 1;
4604 4620
4605 4621 /* DEBUG data */
4606 4622 if ((srcvp == targvp) || (tvp != targvp)) {
4607 4623 cmn_err(CE_WARN, "rfs4_op_rename: "
4608 4624 "srcvp %p, targvp: %p, tvp: %p",
4609 4625 (void *)srcvp, (void *)targvp,
4610 4626 (void *)tvp);
4611 4627 }
4612 4628 } else {
4613 4629 VN_RELE(tvp);
4614 4630 }
4615 4631 }
4616 4632 }
4617 4633 if (error == 0)
4618 4634 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4619 4635
4620 4636 if (in_crit_src)
4621 4637 nbl_end_crit(srcvp);
4622 4638 if (srcvp)
4623 4639 VN_RELE(srcvp);
4624 4640 if (in_crit_targ)
4625 4641 nbl_end_crit(targvp);
4626 4642 if (targvp)
4627 4643 VN_RELE(targvp);
4628 4644
4629 4645 if (unlinked) {
4630 4646 ASSERT(fp != NULL);
4631 4647 ASSERT(tvp != NULL);
4632 4648
4633 4649 /* DEBUG data */
4634 4650 if (RW_READ_HELD(&tvp->v_nbllock)) {
4635 4651 cmn_err(CE_WARN, "rfs4_op_rename: "
4636 4652 "RW_READ_HELD(%p)", (void *)tvp);
4637 4653 }
4638 4654
4639 4655 /* The file is gone and so should the state */
4640 4656 rfs4_close_all_state(fp);
4641 4657 VN_RELE(tvp);
4642 4658 }
4643 4659
4644 4660 if (sfp) {
4645 4661 rfs4_clear_dont_grant(sfp);
4646 4662 rfs4_file_rele(sfp);
4647 4663 }
4648 4664 if (fp) {
4649 4665 rfs4_clear_dont_grant(fp);
4650 4666 rfs4_file_rele(fp);
4651 4667 }
4652 4668
4653 4669 if (converted_onm != onm)
4654 4670 kmem_free(converted_onm, MAXPATHLEN + 1);
4655 4671 kmem_free(onm, olen);
4656 4672 if (converted_nnm != nnm)
4657 4673 kmem_free(converted_nnm, MAXPATHLEN + 1);
4658 4674 kmem_free(nnm, nlen);
4659 4675
4660 4676 /*
4661 4677 * Get the initial "after" sequence number, if it fails, set to zero
4662 4678 */
4663 4679 oidva.va_mask = AT_SEQ;
4664 4680 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4665 4681 oidva.va_seq = 0;
4666 4682
4667 4683 nidva.va_mask = AT_SEQ;
4668 4684 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4669 4685 nidva.va_seq = 0;
4670 4686
4671 4687 /*
4672 4688 * Force modified data and metadata out to stable storage.
4673 4689 */
4674 4690 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4675 4691 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4676 4692
4677 4693 if (error) {
4678 4694 *cs->statusp = resp->status = puterrno4(error);
4679 4695 goto out;
4680 4696 }
4681 4697
4682 4698 /*
4683 4699 * Get "after" change values, if it fails, simply return the
4684 4700 * before value.
4685 4701 */
4686 4702 oadva.va_mask = AT_CTIME|AT_SEQ;
4687 4703 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4688 4704 oadva.va_ctime = obdva.va_ctime;
4689 4705 oadva.va_seq = 0;
4690 4706 }
4691 4707
4692 4708 nadva.va_mask = AT_CTIME|AT_SEQ;
4693 4709 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4694 4710 nadva.va_ctime = nbdva.va_ctime;
4695 4711 nadva.va_seq = 0;
4696 4712 }
4697 4713
4698 4714 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4699 4715 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4700 4716
4701 4717 /*
4702 4718 * The cinfo.atomic = TRUE only if we have
4703 4719 * non-zero va_seq's, and it has incremented by exactly one
4704 4720 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4705 4721 */
4706 4722 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4707 4723 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4708 4724 resp->source_cinfo.atomic = TRUE;
4709 4725 else
4710 4726 resp->source_cinfo.atomic = FALSE;
4711 4727
4712 4728 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4713 4729 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4714 4730 resp->target_cinfo.atomic = TRUE;
4715 4731 else
4716 4732 resp->target_cinfo.atomic = FALSE;
4717 4733
4718 4734 #ifdef VOLATILE_FH_TEST
4719 4735 {
4720 4736 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4721 4737
4722 4738 /*
4723 4739 * Add the renamed file handle to the volatile rename list
4724 4740 */
4725 4741 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4726 4742 /* file handles may expire on rename */
4727 4743 vnode_t *vp;
4728 4744
4729 4745 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4730 4746 /*
4731 4747 * Already know that nnm will be a valid string
4732 4748 */
4733 4749 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4734 4750 NULL, NULL, NULL);
4735 4751 kmem_free(nnm, nlen);
4736 4752 if (!error) {
4737 4753 add_volrnm_fh(cs->exi, vp);
4738 4754 VN_RELE(vp);
4739 4755 }
4740 4756 }
4741 4757 }
4742 4758 #endif /* VOLATILE_FH_TEST */
4743 4759
4744 4760 *cs->statusp = resp->status = NFS4_OK;
4745 4761 out:
4746 4762 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4747 4763 RENAME4res *, resp);
4748 4764 return;
4749 4765
4750 4766 err_out:
4751 4767 if (onm != converted_onm)
4752 4768 kmem_free(converted_onm, MAXPATHLEN + 1);
4753 4769 if (onm != NULL)
4754 4770 kmem_free(onm, olen);
4755 4771 if (nnm != converted_nnm)
4756 4772 kmem_free(converted_nnm, MAXPATHLEN + 1);
4757 4773 if (nnm != NULL)
4758 4774 kmem_free(nnm, nlen);
4759 4775
4760 4776 if (in_crit_src) nbl_end_crit(srcvp);
4761 4777 if (in_crit_targ) nbl_end_crit(targvp);
4762 4778 if (targvp) VN_RELE(targvp);
4763 4779 if (srcvp) VN_RELE(srcvp);
4764 4780 if (sfp) {
4765 4781 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4766 4782 rfs4_file_rele(sfp);
4767 4783 }
4768 4784 if (fp) {
4769 4785 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4770 4786 rfs4_file_rele(fp);
4771 4787 }
4772 4788
4773 4789 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4774 4790 RENAME4res *, resp);
4775 4791 }
4776 4792
4777 4793 /* ARGSUSED */
4778 4794 static void
4779 4795 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4780 4796 struct compound_state *cs)
4781 4797 {
4782 4798 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4783 4799 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4784 4800 rfs4_client_t *cp;
4785 4801
4786 4802 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4787 4803 RENEW4args *, args);
4788 4804
4789 4805 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4790 4806 *cs->statusp = resp->status =
4791 4807 rfs4_check_clientid(&args->clientid, 0);
4792 4808 goto out;
4793 4809 }
4794 4810
4795 4811 if (rfs4_lease_expired(cp)) {
4796 4812 rfs4_client_rele(cp);
4797 4813 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4798 4814 goto out;
4799 4815 }
4800 4816
4801 4817 rfs4_update_lease(cp);
4802 4818
4803 4819 mutex_enter(cp->rc_cbinfo.cb_lock);
4804 4820 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4805 4821 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4806 4822 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4807 4823 } else {
4808 4824 *cs->statusp = resp->status = NFS4_OK;
4809 4825 }
4810 4826 mutex_exit(cp->rc_cbinfo.cb_lock);
4811 4827
4812 4828 rfs4_client_rele(cp);
4813 4829
4814 4830 out:
4815 4831 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4816 4832 RENEW4res *, resp);
4817 4833 }
4818 4834
4819 4835 /* ARGSUSED */
4820 4836 static void
4821 4837 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4822 4838 struct compound_state *cs)
4823 4839 {
4824 4840 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4825 4841
4826 4842 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4827 4843
4828 4844 /* No need to check cs->access - we are not accessing any object */
4829 4845 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4830 4846 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4831 4847 goto out;
4832 4848 }
4833 4849 if (cs->vp != NULL) {
4834 4850 VN_RELE(cs->vp);
4835 4851 }
4836 4852 cs->vp = cs->saved_vp;
4837 4853 cs->saved_vp = NULL;
4838 4854 cs->exi = cs->saved_exi;
4839 4855 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4840 4856 *cs->statusp = resp->status = NFS4_OK;
4841 4857 cs->deleg = FALSE;
4842 4858
4843 4859 out:
4844 4860 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4845 4861 RESTOREFH4res *, resp);
4846 4862 }
4847 4863
4848 4864 /* ARGSUSED */
4849 4865 static void
4850 4866 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4851 4867 struct compound_state *cs)
4852 4868 {
4853 4869 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4854 4870
4855 4871 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4856 4872
4857 4873 /* No need to check cs->access - we are not accessing any object */
4858 4874 if (cs->vp == NULL) {
4859 4875 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4860 4876 goto out;
4861 4877 }
4862 4878 if (cs->saved_vp != NULL) {
4863 4879 VN_RELE(cs->saved_vp);
4864 4880 }
4865 4881 cs->saved_vp = cs->vp;
4866 4882 VN_HOLD(cs->saved_vp);
4867 4883 cs->saved_exi = cs->exi;
4868 4884 /*
4869 4885 * since SAVEFH is fairly rare, don't alloc space for its fh
4870 4886 * unless necessary.
4871 4887 */
4872 4888 if (cs->saved_fh.nfs_fh4_val == NULL) {
4873 4889 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4874 4890 }
4875 4891 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4876 4892 *cs->statusp = resp->status = NFS4_OK;
4877 4893
4878 4894 out:
4879 4895 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4880 4896 SAVEFH4res *, resp);
4881 4897 }
4882 4898
4883 4899 /*
4884 4900 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4885 4901 * return the bitmap of attrs that were set successfully. It is also
4886 4902 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4887 4903 * always be called only after rfs4_do_set_attrs().
4888 4904 *
4889 4905 * Verify that the attributes are same as the expected ones. sargp->vap
4890 4906 * and sargp->sbp contain the input attributes as translated from fattr4.
4891 4907 *
4892 4908 * This function verifies only the attrs that correspond to a vattr or
4893 4909 * vfsstat struct. That is because of the extra step needed to get the
4894 4910 * corresponding system structs. Other attributes have already been set or
4895 4911 * verified by do_rfs4_set_attrs.
4896 4912 *
4897 4913 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4898 4914 */
4899 4915 static int
4900 4916 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4901 4917 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4902 4918 {
4903 4919 int error, ret_error = 0;
4904 4920 int i, k;
4905 4921 uint_t sva_mask = sargp->vap->va_mask;
4906 4922 uint_t vbit;
4907 4923 union nfs4_attr_u *na;
4908 4924 uint8_t *amap;
4909 4925 bool_t getsb = ntovp->vfsstat;
4910 4926
4911 4927 if (sva_mask != 0) {
4912 4928 /*
4913 4929 * Okay to overwrite sargp->vap because we verify based
4914 4930 * on the incoming values.
4915 4931 */
4916 4932 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4917 4933 sargp->cs->cr, NULL);
4918 4934 if (ret_error) {
4919 4935 if (resp == NULL)
4920 4936 return (ret_error);
4921 4937 /*
4922 4938 * Must return bitmap of successful attrs
4923 4939 */
4924 4940 sva_mask = 0; /* to prevent checking vap later */
4925 4941 } else {
4926 4942 /*
4927 4943 * Some file systems clobber va_mask. it is probably
4928 4944 * wrong of them to do so, nonethless we practice
4929 4945 * defensive coding.
4930 4946 * See bug id 4276830.
4931 4947 */
4932 4948 sargp->vap->va_mask = sva_mask;
4933 4949 }
4934 4950 }
4935 4951
4936 4952 if (getsb) {
4937 4953 /*
4938 4954 * Now get the superblock and loop on the bitmap, as there is
4939 4955 * no simple way of translating from superblock to bitmap4.
4940 4956 */
4941 4957 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4942 4958 if (ret_error) {
4943 4959 if (resp == NULL)
4944 4960 goto errout;
4945 4961 getsb = FALSE;
4946 4962 }
4947 4963 }
4948 4964
4949 4965 /*
4950 4966 * Now loop and verify each attribute which getattr returned
4951 4967 * whether it's the same as the input.
4952 4968 */
4953 4969 if (resp == NULL && !getsb && (sva_mask == 0))
4954 4970 goto errout;
4955 4971
4956 4972 na = ntovp->na;
4957 4973 amap = ntovp->amap;
4958 4974 k = 0;
4959 4975 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4960 4976 k = *amap;
4961 4977 ASSERT(nfs4_ntov_map[k].nval == k);
4962 4978 vbit = nfs4_ntov_map[k].vbit;
4963 4979
4964 4980 /*
4965 4981 * If vattr attribute but VOP_GETATTR failed, or it's
4966 4982 * superblock attribute but VFS_STATVFS failed, skip
4967 4983 */
4968 4984 if (vbit) {
4969 4985 if ((vbit & sva_mask) == 0)
4970 4986 continue;
4971 4987 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4972 4988 continue;
4973 4989 }
4974 4990 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4975 4991 if (resp != NULL) {
4976 4992 if (error)
4977 4993 ret_error = -1; /* not all match */
4978 4994 else /* update response bitmap */
4979 4995 *resp |= nfs4_ntov_map[k].fbit;
4980 4996 continue;
4981 4997 }
4982 4998 if (error) {
4983 4999 ret_error = -1; /* not all match */
4984 5000 break;
4985 5001 }
4986 5002 }
4987 5003 errout:
4988 5004 return (ret_error);
4989 5005 }
4990 5006
4991 5007 /*
4992 5008 * Decode the attribute to be set/verified. If the attr requires a sys op
4993 5009 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4994 5010 * call the sv_getit function for it, because the sys op hasn't yet been done.
4995 5011 * Return 0 for success, error code if failed.
4996 5012 *
4997 5013 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4998 5014 */
4999 5015 static int
5000 5016 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5001 5017 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5002 5018 {
5003 5019 int error = 0;
5004 5020 bool_t set_later;
5005 5021
5006 5022 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5007 5023
5008 5024 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5009 5025 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5010 5026 /*
5011 5027 * don't verify yet if a vattr or sb dependent attr,
5012 5028 * because we don't have their sys values yet.
5013 5029 * Will be done later.
5014 5030 */
5015 5031 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5016 5032 /*
5017 5033 * ACLs are a special case, since setting the MODE
5018 5034 * conflicts with setting the ACL. We delay setting
5019 5035 * the ACL until all other attributes have been set.
5020 5036 * The ACL gets set in do_rfs4_op_setattr().
5021 5037 */
5022 5038 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5023 5039 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5024 5040 sargp, nap);
5025 5041 if (error) {
5026 5042 xdr_free(nfs4_ntov_map[k].xfunc,
5027 5043 (caddr_t)nap);
5028 5044 }
5029 5045 }
5030 5046 }
5031 5047 } else {
5032 5048 #ifdef DEBUG
5033 5049 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5034 5050 "decoding attribute %d\n", k);
5035 5051 #endif
5036 5052 error = EINVAL;
5037 5053 }
5038 5054 if (!error && resp_bval && !set_later) {
5039 5055 *resp_bval |= nfs4_ntov_map[k].fbit;
5040 5056 }
5041 5057
5042 5058 return (error);
5043 5059 }
5044 5060
5045 5061 /*
5046 5062 * Set vattr based on incoming fattr4 attrs - used by setattr.
5047 5063 * Set response mask. Ignore any values that are not writable vattr attrs.
5048 5064 */
5049 5065 static nfsstat4
5050 5066 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5051 5067 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5052 5068 nfs4_attr_cmd_t cmd)
5053 5069 {
5054 5070 int error = 0;
5055 5071 int i;
5056 5072 char *attrs = fattrp->attrlist4;
5057 5073 uint32_t attrslen = fattrp->attrlist4_len;
5058 5074 XDR xdr;
5059 5075 nfsstat4 status = NFS4_OK;
5060 5076 vnode_t *vp = cs->vp;
5061 5077 union nfs4_attr_u *na;
5062 5078 uint8_t *amap;
5063 5079
5064 5080 #ifndef lint
5065 5081 /*
5066 5082 * Make sure that maximum attribute number can be expressed as an
5067 5083 * 8 bit quantity.
5068 5084 */
5069 5085 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5070 5086 #endif
5071 5087
5072 5088 if (vp == NULL) {
5073 5089 if (resp)
5074 5090 *resp = 0;
5075 5091 return (NFS4ERR_NOFILEHANDLE);
5076 5092 }
5077 5093 if (cs->access == CS_ACCESS_DENIED) {
5078 5094 if (resp)
5079 5095 *resp = 0;
5080 5096 return (NFS4ERR_ACCESS);
5081 5097 }
5082 5098
5083 5099 sargp->op = cmd;
5084 5100 sargp->cs = cs;
5085 5101 sargp->flag = 0; /* may be set later */
5086 5102 sargp->vap->va_mask = 0;
5087 5103 sargp->rdattr_error = NFS4_OK;
5088 5104 sargp->rdattr_error_req = FALSE;
5089 5105 /* sargp->sbp is set by the caller */
5090 5106
5091 5107 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5092 5108
5093 5109 na = ntovp->na;
5094 5110 amap = ntovp->amap;
5095 5111
5096 5112 /*
5097 5113 * The following loop iterates on the nfs4_ntov_map checking
5098 5114 * if the fbit is set in the requested bitmap.
5099 5115 * If set then we process the arguments using the
5100 5116 * rfs4_fattr4 conversion functions to populate the setattr
5101 5117 * vattr and va_mask. Any settable attrs that are not using vattr
5102 5118 * will be set in this loop.
5103 5119 */
5104 5120 for (i = 0; i < nfs4_ntov_map_size; i++) {
5105 5121 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5106 5122 continue;
5107 5123 }
5108 5124 /*
5109 5125 * If setattr, must be a writable attr.
5110 5126 * If verify/nverify, must be a readable attr.
5111 5127 */
5112 5128 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5113 5129 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5114 5130 /*
5115 5131 * Client tries to set/verify an
5116 5132 * unsupported attribute, tries to set
5117 5133 * a read only attr or verify a write
5118 5134 * only one - error!
5119 5135 */
5120 5136 break;
5121 5137 }
5122 5138 /*
5123 5139 * Decode the attribute to set/verify
5124 5140 */
5125 5141 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5126 5142 &xdr, resp ? resp : NULL, na);
5127 5143 if (error)
5128 5144 break;
5129 5145 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5130 5146 na++;
5131 5147 (ntovp->attrcnt)++;
5132 5148 if (nfs4_ntov_map[i].vfsstat)
5133 5149 ntovp->vfsstat = TRUE;
5134 5150 }
5135 5151
5136 5152 if (error != 0)
5137 5153 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5138 5154 puterrno4(error));
5139 5155 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5140 5156 return (status);
5141 5157 }
5142 5158
5143 5159 static nfsstat4
5144 5160 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5145 5161 stateid4 *stateid)
5146 5162 {
5147 5163 int error = 0;
5148 5164 struct nfs4_svgetit_arg sarg;
5149 5165 bool_t trunc;
5150 5166
5151 5167 nfsstat4 status = NFS4_OK;
5152 5168 cred_t *cr = cs->cr;
5153 5169 vnode_t *vp = cs->vp;
5154 5170 struct nfs4_ntov_table ntov;
5155 5171 struct statvfs64 sb;
5156 5172 struct vattr bva;
5157 5173 struct flock64 bf;
5158 5174 int in_crit = 0;
5159 5175 uint_t saved_mask = 0;
5160 5176 caller_context_t ct;
5161 5177
5162 5178 *resp = 0;
5163 5179 sarg.sbp = &sb;
5164 5180 sarg.is_referral = B_FALSE;
5165 5181 nfs4_ntov_table_init(&ntov);
5166 5182 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5167 5183 NFS4ATTR_SETIT);
5168 5184 if (status != NFS4_OK) {
5169 5185 /*
5170 5186 * failed set attrs
5171 5187 */
5172 5188 goto done;
5173 5189 }
5174 5190 if ((sarg.vap->va_mask == 0) &&
5175 5191 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5176 5192 /*
5177 5193 * no further work to be done
5178 5194 */
5179 5195 goto done;
5180 5196 }
5181 5197
5182 5198 /*
5183 5199 * If we got a request to set the ACL and the MODE, only
5184 5200 * allow changing VSUID, VSGID, and VSVTX. Attempting
5185 5201 * to change any other bits, along with setting an ACL,
5186 5202 * gives NFS4ERR_INVAL.
5187 5203 */
5188 5204 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5189 5205 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5190 5206 vattr_t va;
5191 5207
5192 5208 va.va_mask = AT_MODE;
5193 5209 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5194 5210 if (error) {
5195 5211 status = puterrno4(error);
5196 5212 goto done;
5197 5213 }
5198 5214 if ((sarg.vap->va_mode ^ va.va_mode) &
5199 5215 ~(VSUID | VSGID | VSVTX)) {
5200 5216 status = NFS4ERR_INVAL;
5201 5217 goto done;
5202 5218 }
5203 5219 }
5204 5220
5205 5221 /* Check stateid only if size has been set */
5206 5222 if (sarg.vap->va_mask & AT_SIZE) {
5207 5223 trunc = (sarg.vap->va_size == 0);
5208 5224 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5209 5225 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5210 5226 if (status != NFS4_OK)
5211 5227 goto done;
5212 5228 } else {
5213 5229 ct.cc_sysid = 0;
5214 5230 ct.cc_pid = 0;
5215 5231 ct.cc_caller_id = nfs4_srv_caller_id;
5216 5232 ct.cc_flags = CC_DONTBLOCK;
5217 5233 }
5218 5234
5219 5235 /* XXX start of possible race with delegations */
5220 5236
5221 5237 /*
5222 5238 * We need to specially handle size changes because it is
5223 5239 * possible for the client to create a file with read-only
5224 5240 * modes, but with the file opened for writing. If the client
5225 5241 * then tries to set the file size, e.g. ftruncate(3C),
5226 5242 * fcntl(F_FREESP), the normal access checking done in
5227 5243 * VOP_SETATTR would prevent the client from doing it even though
5228 5244 * it should be allowed to do so. To get around this, we do the
5229 5245 * access checking for ourselves and use VOP_SPACE which doesn't
5230 5246 * do the access checking.
5231 5247 * Also the client should not be allowed to change the file
5232 5248 * size if there is a conflicting non-blocking mandatory lock in
5233 5249 * the region of the change.
5234 5250 */
5235 5251 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5236 5252 u_offset_t offset;
5237 5253 ssize_t length;
5238 5254
5239 5255 /*
5240 5256 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5241 5257 * before returning, sarg.vap->va_mask is used to
5242 5258 * generate the setattr reply bitmap. We also clear
5243 5259 * AT_SIZE below before calling VOP_SPACE. For both
5244 5260 * of these cases, the va_mask needs to be saved here
5245 5261 * and restored after calling VOP_SETATTR.
5246 5262 */
5247 5263 saved_mask = sarg.vap->va_mask;
5248 5264
5249 5265 /*
5250 5266 * Check any possible conflict due to NBMAND locks.
5251 5267 * Get into critical region before VOP_GETATTR, so the
5252 5268 * size attribute is valid when checking conflicts.
5253 5269 */
5254 5270 if (nbl_need_check(vp)) {
5255 5271 nbl_start_crit(vp, RW_READER);
5256 5272 in_crit = 1;
5257 5273 }
5258 5274
5259 5275 bva.va_mask = AT_UID|AT_SIZE;
5260 5276 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5261 5277 status = puterrno4(error);
5262 5278 goto done;
5263 5279 }
5264 5280
5265 5281 if (in_crit) {
5266 5282 if (sarg.vap->va_size < bva.va_size) {
5267 5283 offset = sarg.vap->va_size;
5268 5284 length = bva.va_size - sarg.vap->va_size;
5269 5285 } else {
5270 5286 offset = bva.va_size;
5271 5287 length = sarg.vap->va_size - bva.va_size;
5272 5288 }
5273 5289 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5274 5290 &ct)) {
5275 5291 status = NFS4ERR_LOCKED;
5276 5292 goto done;
5277 5293 }
5278 5294 }
5279 5295
5280 5296 if (crgetuid(cr) == bva.va_uid) {
5281 5297 sarg.vap->va_mask &= ~AT_SIZE;
5282 5298 bf.l_type = F_WRLCK;
5283 5299 bf.l_whence = 0;
5284 5300 bf.l_start = (off64_t)sarg.vap->va_size;
5285 5301 bf.l_len = 0;
5286 5302 bf.l_sysid = 0;
5287 5303 bf.l_pid = 0;
5288 5304 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5289 5305 (offset_t)sarg.vap->va_size, cr, &ct);
5290 5306 }
5291 5307 }
5292 5308
5293 5309 if (!error && sarg.vap->va_mask != 0)
5294 5310 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5295 5311
5296 5312 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5297 5313 if (saved_mask & AT_SIZE)
5298 5314 sarg.vap->va_mask |= AT_SIZE;
5299 5315
5300 5316 /*
5301 5317 * If an ACL was being set, it has been delayed until now,
5302 5318 * in order to set the mode (via the VOP_SETATTR() above) first.
5303 5319 */
5304 5320 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5305 5321 int i;
5306 5322
5307 5323 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5308 5324 if (ntov.amap[i] == FATTR4_ACL)
5309 5325 break;
5310 5326 if (i < NFS4_MAXNUM_ATTRS) {
5311 5327 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5312 5328 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5313 5329 if (error == 0) {
5314 5330 *resp |= FATTR4_ACL_MASK;
5315 5331 } else if (error == ENOTSUP) {
5316 5332 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5317 5333 status = NFS4ERR_ATTRNOTSUPP;
5318 5334 goto done;
5319 5335 }
5320 5336 } else {
5321 5337 NFS4_DEBUG(rfs4_debug,
5322 5338 (CE_NOTE, "do_rfs4_op_setattr: "
5323 5339 "unable to find ACL in fattr4"));
5324 5340 error = EINVAL;
5325 5341 }
5326 5342 }
5327 5343
5328 5344 if (error) {
5329 5345 /* check if a monitor detected a delegation conflict */
5330 5346 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5331 5347 status = NFS4ERR_DELAY;
5332 5348 else
5333 5349 status = puterrno4(error);
5334 5350
5335 5351 /*
5336 5352 * Set the response bitmap when setattr failed.
5337 5353 * If VOP_SETATTR partially succeeded, test by doing a
5338 5354 * VOP_GETATTR on the object and comparing the data
5339 5355 * to the setattr arguments.
5340 5356 */
5341 5357 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5342 5358 } else {
5343 5359 /*
5344 5360 * Force modified metadata out to stable storage.
5345 5361 */
5346 5362 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5347 5363 /*
5348 5364 * Set response bitmap
5349 5365 */
5350 5366 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5351 5367 }
5352 5368
5353 5369 /* Return early and already have a NFSv4 error */
5354 5370 done:
5355 5371 /*
5356 5372 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5357 5373 * conversion sets both readable and writeable NFS4 attrs
5358 5374 * for AT_MTIME and AT_ATIME. The line below masks out
5359 5375 * unrequested attrs from the setattr result bitmap. This
5360 5376 * is placed after the done: label to catch the ATTRNOTSUP
5361 5377 * case.
5362 5378 */
5363 5379 *resp &= fattrp->attrmask;
5364 5380
5365 5381 if (in_crit)
5366 5382 nbl_end_crit(vp);
5367 5383
5368 5384 nfs4_ntov_table_free(&ntov, &sarg);
5369 5385
5370 5386 return (status);
5371 5387 }
5372 5388
5373 5389 /* ARGSUSED */
5374 5390 static void
5375 5391 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5376 5392 struct compound_state *cs)
5377 5393 {
5378 5394 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5379 5395 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5380 5396 bslabel_t *clabel;
5381 5397
5382 5398 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5383 5399 SETATTR4args *, args);
5384 5400
5385 5401 if (cs->vp == NULL) {
5386 5402 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5387 5403 goto out;
5388 5404 }
5389 5405
5390 5406 /*
5391 5407 * If there is an unshared filesystem mounted on this vnode,
5392 5408 * do not allow to setattr on this vnode.
5393 5409 */
5394 5410 if (vn_ismntpt(cs->vp)) {
5395 5411 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5396 5412 goto out;
5397 5413 }
5398 5414
5399 5415 resp->attrsset = 0;
5400 5416
5401 5417 if (rdonly4(req, cs)) {
5402 5418 *cs->statusp = resp->status = NFS4ERR_ROFS;
5403 5419 goto out;
5404 5420 }
5405 5421
5406 5422 /* check label before setting attributes */
5407 5423 if (is_system_labeled()) {
5408 5424 ASSERT(req->rq_label != NULL);
5409 5425 clabel = req->rq_label;
5410 5426 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5411 5427 "got client label from request(1)",
5412 5428 struct svc_req *, req);
5413 5429 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5414 5430 if (!do_rfs_label_check(clabel, cs->vp,
5415 5431 EQUALITY_CHECK, cs->exi)) {
5416 5432 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5417 5433 goto out;
5418 5434 }
5419 5435 }
5420 5436 }
5421 5437
5422 5438 *cs->statusp = resp->status =
5423 5439 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5424 5440 &args->stateid);
5425 5441
5426 5442 out:
5427 5443 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5428 5444 SETATTR4res *, resp);
5429 5445 }
5430 5446
5431 5447 /* ARGSUSED */
5432 5448 static void
5433 5449 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5434 5450 struct compound_state *cs)
5435 5451 {
5436 5452 /*
5437 5453 * verify and nverify are exactly the same, except that nverify
5438 5454 * succeeds when some argument changed, and verify succeeds when
5439 5455 * when none changed.
5440 5456 */
5441 5457
5442 5458 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5443 5459 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5444 5460
5445 5461 int error;
5446 5462 struct nfs4_svgetit_arg sarg;
5447 5463 struct statvfs64 sb;
5448 5464 struct nfs4_ntov_table ntov;
5449 5465
5450 5466 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5451 5467 VERIFY4args *, args);
5452 5468
5453 5469 if (cs->vp == NULL) {
5454 5470 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5455 5471 goto out;
5456 5472 }
5457 5473
5458 5474 sarg.sbp = &sb;
5459 5475 sarg.is_referral = B_FALSE;
5460 5476 nfs4_ntov_table_init(&ntov);
5461 5477 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5462 5478 &sarg, &ntov, NFS4ATTR_VERIT);
5463 5479 if (resp->status != NFS4_OK) {
5464 5480 /*
5465 5481 * do_rfs4_set_attrs will try to verify systemwide attrs,
5466 5482 * so could return -1 for "no match".
5467 5483 */
5468 5484 if (resp->status == -1)
5469 5485 resp->status = NFS4ERR_NOT_SAME;
5470 5486 goto done;
5471 5487 }
5472 5488 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5473 5489 switch (error) {
5474 5490 case 0:
5475 5491 resp->status = NFS4_OK;
5476 5492 break;
5477 5493 case -1:
5478 5494 resp->status = NFS4ERR_NOT_SAME;
5479 5495 break;
5480 5496 default:
5481 5497 resp->status = puterrno4(error);
5482 5498 break;
5483 5499 }
5484 5500 done:
5485 5501 *cs->statusp = resp->status;
5486 5502 nfs4_ntov_table_free(&ntov, &sarg);
5487 5503 out:
5488 5504 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5489 5505 VERIFY4res *, resp);
5490 5506 }
5491 5507
5492 5508 /* ARGSUSED */
5493 5509 static void
5494 5510 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5495 5511 struct compound_state *cs)
5496 5512 {
5497 5513 /*
5498 5514 * verify and nverify are exactly the same, except that nverify
5499 5515 * succeeds when some argument changed, and verify succeeds when
5500 5516 * when none changed.
5501 5517 */
5502 5518
5503 5519 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5504 5520 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5505 5521
5506 5522 int error;
5507 5523 struct nfs4_svgetit_arg sarg;
5508 5524 struct statvfs64 sb;
5509 5525 struct nfs4_ntov_table ntov;
5510 5526
5511 5527 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5512 5528 NVERIFY4args *, args);
5513 5529
5514 5530 if (cs->vp == NULL) {
5515 5531 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5516 5532 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5517 5533 NVERIFY4res *, resp);
5518 5534 return;
5519 5535 }
5520 5536 sarg.sbp = &sb;
5521 5537 sarg.is_referral = B_FALSE;
5522 5538 nfs4_ntov_table_init(&ntov);
5523 5539 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5524 5540 &sarg, &ntov, NFS4ATTR_VERIT);
5525 5541 if (resp->status != NFS4_OK) {
5526 5542 /*
5527 5543 * do_rfs4_set_attrs will try to verify systemwide attrs,
5528 5544 * so could return -1 for "no match".
5529 5545 */
5530 5546 if (resp->status == -1)
5531 5547 resp->status = NFS4_OK;
5532 5548 goto done;
5533 5549 }
5534 5550 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5535 5551 switch (error) {
5536 5552 case 0:
5537 5553 resp->status = NFS4ERR_SAME;
5538 5554 break;
5539 5555 case -1:
5540 5556 resp->status = NFS4_OK;
5541 5557 break;
5542 5558 default:
5543 5559 resp->status = puterrno4(error);
5544 5560 break;
5545 5561 }
5546 5562 done:
5547 5563 *cs->statusp = resp->status;
5548 5564 nfs4_ntov_table_free(&ntov, &sarg);
5549 5565
5550 5566 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5551 5567 NVERIFY4res *, resp);
5552 5568 }
5553 5569
5554 5570 /*
5555 5571 * XXX - This should live in an NFS header file.
5556 5572 */
5557 5573 #define MAX_IOVECS 12
5558 5574
5559 5575 /* ARGSUSED */
5560 5576 static void
5561 5577 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5562 5578 struct compound_state *cs)
5563 5579 {
5564 5580 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5565 5581 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5566 5582 int error;
5567 5583 vnode_t *vp;
5568 5584 struct vattr bva;
5569 5585 u_offset_t rlimit;
5570 5586 struct uio uio;
5571 5587 struct iovec iov[MAX_IOVECS];
5572 5588 struct iovec *iovp;
5573 5589 int iovcnt;
5574 5590 int ioflag;
5575 5591 cred_t *savecred, *cr;
5576 5592 bool_t *deleg = &cs->deleg;
5577 5593 nfsstat4 stat;
5578 5594 int in_crit = 0;
5579 5595 caller_context_t ct;
5580 5596 nfs4_srv_t *nsrv4;
5581 5597
5582 5598 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5583 5599 WRITE4args *, args);
5584 5600
5585 5601 vp = cs->vp;
5586 5602 if (vp == NULL) {
5587 5603 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5588 5604 goto out;
5589 5605 }
5590 5606 if (cs->access == CS_ACCESS_DENIED) {
5591 5607 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5592 5608 goto out;
5593 5609 }
5594 5610
5595 5611 cr = cs->cr;
5596 5612
5597 5613 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5598 5614 deleg, TRUE, &ct)) != NFS4_OK) {
5599 5615 *cs->statusp = resp->status = stat;
5600 5616 goto out;
5601 5617 }
5602 5618
5603 5619 /*
5604 5620 * We have to enter the critical region before calling VOP_RWLOCK
5605 5621 * to avoid a deadlock with ufs.
5606 5622 */
5607 5623 if (nbl_need_check(vp)) {
5608 5624 nbl_start_crit(vp, RW_READER);
5609 5625 in_crit = 1;
5610 5626 if (nbl_conflict(vp, NBL_WRITE,
5611 5627 args->offset, args->data_len, 0, &ct)) {
5612 5628 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5613 5629 goto out;
5614 5630 }
5615 5631 }
5616 5632
5617 5633 bva.va_mask = AT_MODE | AT_UID;
5618 5634 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5619 5635
5620 5636 /*
5621 5637 * If we can't get the attributes, then we can't do the
5622 5638 * right access checking. So, we'll fail the request.
5623 5639 */
5624 5640 if (error) {
5625 5641 *cs->statusp = resp->status = puterrno4(error);
5626 5642 goto out;
5627 5643 }
5628 5644
5629 5645 if (rdonly4(req, cs)) {
5630 5646 *cs->statusp = resp->status = NFS4ERR_ROFS;
5631 5647 goto out;
5632 5648 }
5633 5649
5634 5650 if (vp->v_type != VREG) {
5635 5651 *cs->statusp = resp->status =
5636 5652 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5637 5653 goto out;
5638 5654 }
5639 5655
5640 5656 if (crgetuid(cr) != bva.va_uid &&
5641 5657 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5642 5658 *cs->statusp = resp->status = puterrno4(error);
5643 5659 goto out;
5644 5660 }
5645 5661
5646 5662 if (MANDLOCK(vp, bva.va_mode)) {
5647 5663 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5648 5664 goto out;
5649 5665 }
5650 5666
5651 5667 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5652 5668 if (args->data_len == 0) {
5653 5669 *cs->statusp = resp->status = NFS4_OK;
5654 5670 resp->count = 0;
5655 5671 resp->committed = args->stable;
5656 5672 resp->writeverf = nsrv4->write4verf;
5657 5673 goto out;
5658 5674 }
5659 5675
5660 5676 if (args->mblk != NULL) {
5661 5677 mblk_t *m;
5662 5678 uint_t bytes, round_len;
5663 5679
5664 5680 iovcnt = 0;
5665 5681 bytes = 0;
5666 5682 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5667 5683 for (m = args->mblk;
5668 5684 m != NULL && bytes < round_len;
5669 5685 m = m->b_cont) {
5670 5686 iovcnt++;
5671 5687 bytes += MBLKL(m);
5672 5688 }
5673 5689 #ifdef DEBUG
5674 5690 /* should have ended on an mblk boundary */
5675 5691 if (bytes != round_len) {
5676 5692 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5677 5693 bytes, round_len, args->data_len);
5678 5694 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5679 5695 (void *)args->mblk, (void *)m);
5680 5696 ASSERT(bytes == round_len);
5681 5697 }
5682 5698 #endif
5683 5699 if (iovcnt <= MAX_IOVECS) {
5684 5700 iovp = iov;
5685 5701 } else {
5686 5702 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5687 5703 }
5688 5704 mblk_to_iov(args->mblk, iovcnt, iovp);
5689 5705 } else if (args->rlist != NULL) {
5690 5706 iovcnt = 1;
5691 5707 iovp = iov;
5692 5708 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5693 5709 iovp->iov_len = args->data_len;
5694 5710 } else {
5695 5711 iovcnt = 1;
5696 5712 iovp = iov;
5697 5713 iovp->iov_base = args->data_val;
5698 5714 iovp->iov_len = args->data_len;
5699 5715 }
5700 5716
5701 5717 uio.uio_iov = iovp;
5702 5718 uio.uio_iovcnt = iovcnt;
5703 5719
5704 5720 uio.uio_segflg = UIO_SYSSPACE;
5705 5721 uio.uio_extflg = UIO_COPY_DEFAULT;
5706 5722 uio.uio_loffset = args->offset;
5707 5723 uio.uio_resid = args->data_len;
5708 5724 uio.uio_llimit = curproc->p_fsz_ctl;
5709 5725 rlimit = uio.uio_llimit - args->offset;
5710 5726 if (rlimit < (u_offset_t)uio.uio_resid)
5711 5727 uio.uio_resid = (int)rlimit;
5712 5728
5713 5729 if (args->stable == UNSTABLE4)
5714 5730 ioflag = 0;
5715 5731 else if (args->stable == FILE_SYNC4)
5716 5732 ioflag = FSYNC;
5717 5733 else if (args->stable == DATA_SYNC4)
5718 5734 ioflag = FDSYNC;
5719 5735 else {
5720 5736 if (iovp != iov)
5721 5737 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5722 5738 *cs->statusp = resp->status = NFS4ERR_INVAL;
5723 5739 goto out;
5724 5740 }
5725 5741
5726 5742 /*
5727 5743 * We're changing creds because VM may fault and we need
5728 5744 * the cred of the current thread to be used if quota
5729 5745 * checking is enabled.
5730 5746 */
5731 5747 savecred = curthread->t_cred;
5732 5748 curthread->t_cred = cr;
5733 5749 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5734 5750 curthread->t_cred = savecred;
5735 5751
5736 5752 if (iovp != iov)
5737 5753 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5738 5754
5739 5755 if (error) {
5740 5756 *cs->statusp = resp->status = puterrno4(error);
5741 5757 goto out;
5742 5758 }
5743 5759
5744 5760 *cs->statusp = resp->status = NFS4_OK;
5745 5761 resp->count = args->data_len - uio.uio_resid;
5746 5762
5747 5763 if (ioflag == 0)
5748 5764 resp->committed = UNSTABLE4;
5749 5765 else
5750 5766 resp->committed = FILE_SYNC4;
5751 5767
5752 5768 resp->writeverf = nsrv4->write4verf;
5753 5769
5754 5770 out:
5755 5771 if (in_crit)
5756 5772 nbl_end_crit(vp);
5757 5773
5758 5774 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5759 5775 WRITE4res *, resp);
5760 5776 }
5761 5777
5762 5778
5763 5779 /* XXX put in a header file */
5764 5780 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5765 5781
5766 5782 void
5767 5783 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5768 5784 struct svc_req *req, cred_t *cr, int *rv)
5769 5785 {
5770 5786 uint_t i;
5771 5787 struct compound_state cs;
5772 5788 nfs4_srv_t *nsrv4;
5773 5789 nfs_export_t *ne = nfs_get_export();
5774 5790
5775 5791 if (rv != NULL)
5776 5792 *rv = 0;
5777 5793 rfs4_init_compound_state(&cs);
5778 5794 /*
5779 5795 * Form a reply tag by copying over the reqeuest tag.
5780 5796 */
5781 5797 resp->tag.utf8string_val =
5782 5798 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5783 5799 resp->tag.utf8string_len = args->tag.utf8string_len;
5784 5800 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5785 5801 resp->tag.utf8string_len);
5786 5802
5787 5803 cs.statusp = &resp->status;
5788 5804 cs.req = req;
5789 5805 resp->array = NULL;
5790 5806 resp->array_len = 0;
5791 5807
5792 5808 /*
5793 5809 * XXX for now, minorversion should be zero
5794 5810 */
5795 5811 if (args->minorversion != NFS4_MINORVERSION) {
5796 5812 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5797 5813 &cs, COMPOUND4args *, args);
5798 5814 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5799 5815 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5800 5816 &cs, COMPOUND4res *, resp);
5801 5817 return;
5802 5818 }
5803 5819
5804 5820 if (args->array_len == 0) {
5805 5821 resp->status = NFS4_OK;
5806 5822 return;
5807 5823 }
5808 5824
5809 5825 ASSERT(exi == NULL);
5810 5826 ASSERT(cr == NULL);
5811 5827
5812 5828 cr = crget();
5813 5829 ASSERT(cr != NULL);
5814 5830
5815 5831 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5816 5832 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5817 5833 &cs, COMPOUND4args *, args);
5818 5834 crfree(cr);
5819 5835 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5820 5836 &cs, COMPOUND4res *, resp);
5821 5837 svcerr_badcred(req->rq_xprt);
5822 5838 if (rv != NULL)
5823 5839 *rv = 1;
5824 5840 return;
5825 5841 }
5826 5842 resp->array_len = args->array_len;
5827 5843 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5828 5844 KM_SLEEP);
5829 5845
5830 5846 cs.basecr = cr;
5831 5847 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5832 5848
5833 5849 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5834 5850 COMPOUND4args *, args);
5835 5851
5836 5852 /*
5837 5853 * For now, NFS4 compound processing must be protected by
5838 5854 * exported_lock because it can access more than one exportinfo
5839 5855 * per compound and share/unshare can now change multiple
5840 5856 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5841 5857 * per proc (excluding public exinfo), and exi_count design
5842 5858 * is sufficient to protect concurrent execution of NFS2/3
5843 5859 * ops along with unexport. This lock will be removed as
5844 5860 * part of the NFSv4 phase 2 namespace redesign work.
5845 5861 */
5846 5862 rw_enter(&ne->exported_lock, RW_READER);
5847 5863
5848 5864 /*
5849 5865 * If this is the first compound we've seen, we need to start all
5850 5866 * new instances' grace periods.
5851 5867 */
5852 5868 if (nsrv4->seen_first_compound == 0) {
5853 5869 rfs4_grace_start_new(nsrv4);
5854 5870 /*
5855 5871 * This must be set after rfs4_grace_start_new(), otherwise
5856 5872 * another thread could proceed past here before the former
5857 5873 * is finished.
5858 5874 */
5859 5875 nsrv4->seen_first_compound = 1;
5860 5876 }
5861 5877
5862 5878 for (i = 0; i < args->array_len && cs.cont; i++) {
5863 5879 nfs_argop4 *argop;
5864 5880 nfs_resop4 *resop;
5865 5881 uint_t op;
5866 5882
5867 5883 argop = &args->array[i];
5868 5884 resop = &resp->array[i];
5869 5885 resop->resop = argop->argop;
5870 5886 op = (uint_t)resop->resop;
5871 5887
5872 5888 if (op < rfsv4disp_cnt) {
5873 5889 /*
5874 5890 * Count the individual ops here; NULL and COMPOUND
5875 5891 * are counted in common_dispatch()
5876 5892 */
5877 5893 rfsproccnt_v4_ptr[op].value.ui64++;
5878 5894
5879 5895 NFS4_DEBUG(rfs4_debug > 1,
5880 5896 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5881 5897 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5882 5898 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5883 5899 rfs4_op_string[op], *cs.statusp));
5884 5900 if (*cs.statusp != NFS4_OK)
5885 5901 cs.cont = FALSE;
5886 5902 } else {
5887 5903 /*
5888 5904 * This is effectively dead code since XDR code
5889 5905 * will have already returned BADXDR if op doesn't
5890 5906 * decode to legal value. This only done for a
5891 5907 * day when XDR code doesn't verify v4 opcodes.
5892 5908 */
5893 5909 op = OP_ILLEGAL;
5894 5910 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5895 5911
5896 5912 rfs4_op_illegal(argop, resop, req, &cs);
5897 5913 cs.cont = FALSE;
5898 5914 }
5899 5915
5900 5916 /*
5901 5917 * If not at last op, and if we are to stop, then
5902 5918 * compact the results array.
5903 5919 */
5904 5920 if ((i + 1) < args->array_len && !cs.cont) {
5905 5921 nfs_resop4 *new_res = kmem_alloc(
5906 5922 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5907 5923 bcopy(resp->array,
5908 5924 new_res, (i+1) * sizeof (nfs_resop4));
5909 5925 kmem_free(resp->array,
5910 5926 args->array_len * sizeof (nfs_resop4));
5911 5927
5912 5928 resp->array_len = i + 1;
5913 5929 resp->array = new_res;
5914 5930 }
5915 5931 }
5916 5932
5917 5933 rw_exit(&ne->exported_lock);
5918 5934
5919 5935 /*
5920 5936 * clear exportinfo and vnode fields from compound_state before dtrace
5921 5937 * probe, to avoid tracing residual values for path and share path.
5922 5938 */
5923 5939 if (cs.vp)
5924 5940 VN_RELE(cs.vp);
5925 5941 if (cs.saved_vp)
5926 5942 VN_RELE(cs.saved_vp);
5927 5943 cs.exi = cs.saved_exi = NULL;
5928 5944 cs.vp = cs.saved_vp = NULL;
5929 5945
5930 5946 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5931 5947 COMPOUND4res *, resp);
5932 5948
5933 5949 if (cs.saved_fh.nfs_fh4_val)
5934 5950 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5935 5951
5936 5952 if (cs.basecr)
5937 5953 crfree(cs.basecr);
5938 5954 if (cs.cr)
5939 5955 crfree(cs.cr);
5940 5956 /*
5941 5957 * done with this compound request, free the label
5942 5958 */
5943 5959
5944 5960 if (req->rq_label != NULL) {
5945 5961 kmem_free(req->rq_label, sizeof (bslabel_t));
5946 5962 req->rq_label = NULL;
5947 5963 }
5948 5964 }
5949 5965
5950 5966 /*
5951 5967 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5952 5968 * XXX zero out the tag and array values. Need to investigate why the
5953 5969 * XXX calls occur, but at least prevent the panic for now.
5954 5970 */
5955 5971 void
5956 5972 rfs4_compound_free(COMPOUND4res *resp)
5957 5973 {
5958 5974 uint_t i;
5959 5975
5960 5976 if (resp->tag.utf8string_val) {
5961 5977 UTF8STRING_FREE(resp->tag)
5962 5978 }
5963 5979
5964 5980 for (i = 0; i < resp->array_len; i++) {
5965 5981 nfs_resop4 *resop;
5966 5982 uint_t op;
5967 5983
5968 5984 resop = &resp->array[i];
5969 5985 op = (uint_t)resop->resop;
5970 5986 if (op < rfsv4disp_cnt) {
5971 5987 (*rfsv4disptab[op].dis_resfree)(resop);
5972 5988 }
5973 5989 }
5974 5990 if (resp->array != NULL) {
5975 5991 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5976 5992 }
5977 5993 }
5978 5994
5979 5995 /*
5980 5996 * Process the value of the compound request rpc flags, as a bit-AND
5981 5997 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5982 5998 */
5983 5999 void
5984 6000 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5985 6001 {
5986 6002 int i;
5987 6003 int flag = RPC_ALL;
5988 6004
5989 6005 for (i = 0; flag && i < args->array_len; i++) {
5990 6006 uint_t op;
5991 6007
5992 6008 op = (uint_t)args->array[i].argop;
5993 6009
5994 6010 if (op < rfsv4disp_cnt)
5995 6011 flag &= rfsv4disptab[op].dis_flags;
5996 6012 else
5997 6013 flag = 0;
5998 6014 }
5999 6015 *flagp = flag;
6000 6016 }
6001 6017
6002 6018 nfsstat4
6003 6019 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6004 6020 {
6005 6021 nfsstat4 e;
6006 6022
6007 6023 rfs4_dbe_lock(cp->rc_dbe);
6008 6024
6009 6025 if (cp->rc_sysidt != LM_NOSYSID) {
6010 6026 *sp = cp->rc_sysidt;
6011 6027 e = NFS4_OK;
6012 6028
6013 6029 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6014 6030 *sp = cp->rc_sysidt;
6015 6031 e = NFS4_OK;
6016 6032
6017 6033 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6018 6034 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6019 6035 } else
6020 6036 e = NFS4ERR_DELAY;
6021 6037
6022 6038 rfs4_dbe_unlock(cp->rc_dbe);
6023 6039 return (e);
6024 6040 }
6025 6041
6026 6042 #if defined(DEBUG) && ! defined(lint)
6027 6043 static void lock_print(char *str, int operation, struct flock64 *flk)
6028 6044 {
6029 6045 char *op, *type;
6030 6046
6031 6047 switch (operation) {
6032 6048 case F_GETLK: op = "F_GETLK";
6033 6049 break;
6034 6050 case F_SETLK: op = "F_SETLK";
6035 6051 break;
6036 6052 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6037 6053 break;
6038 6054 default: op = "F_UNKNOWN";
6039 6055 break;
6040 6056 }
6041 6057 switch (flk->l_type) {
6042 6058 case F_UNLCK: type = "F_UNLCK";
6043 6059 break;
6044 6060 case F_RDLCK: type = "F_RDLCK";
6045 6061 break;
6046 6062 case F_WRLCK: type = "F_WRLCK";
6047 6063 break;
6048 6064 default: type = "F_UNKNOWN";
6049 6065 break;
6050 6066 }
6051 6067
6052 6068 ASSERT(flk->l_whence == 0);
6053 6069 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6054 6070 str, op, type, (longlong_t)flk->l_start,
6055 6071 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6056 6072 }
6057 6073
6058 6074 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6059 6075 #else
6060 6076 #define LOCK_PRINT(d, s, t, f)
6061 6077 #endif
6062 6078
6063 6079 /*ARGSUSED*/
6064 6080 static bool_t
6065 6081 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6066 6082 {
6067 6083 return (TRUE);
6068 6084 }
6069 6085
6070 6086 /*
6071 6087 * Look up the pathname using the vp in cs as the directory vnode.
6072 6088 * cs->vp will be the vnode for the file on success
6073 6089 */
6074 6090
6075 6091 static nfsstat4
6076 6092 rfs4_lookup(component4 *component, struct svc_req *req,
6077 6093 struct compound_state *cs)
6078 6094 {
6079 6095 char *nm;
6080 6096 uint32_t len;
6081 6097 nfsstat4 status;
6082 6098 struct sockaddr *ca;
6083 6099 char *name;
6084 6100
6085 6101 if (cs->vp == NULL) {
6086 6102 return (NFS4ERR_NOFILEHANDLE);
6087 6103 }
6088 6104 if (cs->vp->v_type != VDIR) {
6089 6105 return (NFS4ERR_NOTDIR);
6090 6106 }
6091 6107
6092 6108 status = utf8_dir_verify(component);
6093 6109 if (status != NFS4_OK)
6094 6110 return (status);
6095 6111
6096 6112 nm = utf8_to_fn(component, &len, NULL);
6097 6113 if (nm == NULL) {
6098 6114 return (NFS4ERR_INVAL);
6099 6115 }
6100 6116
6101 6117 if (len > MAXNAMELEN) {
6102 6118 kmem_free(nm, len);
6103 6119 return (NFS4ERR_NAMETOOLONG);
6104 6120 }
6105 6121
6106 6122 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6107 6123 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6108 6124 MAXPATHLEN + 1);
6109 6125
6110 6126 if (name == NULL) {
6111 6127 kmem_free(nm, len);
6112 6128 return (NFS4ERR_INVAL);
6113 6129 }
6114 6130
6115 6131 status = do_rfs4_op_lookup(name, req, cs);
6116 6132
6117 6133 if (name != nm)
6118 6134 kmem_free(name, MAXPATHLEN + 1);
6119 6135
6120 6136 kmem_free(nm, len);
6121 6137
6122 6138 return (status);
6123 6139 }
6124 6140
6125 6141 static nfsstat4
6126 6142 rfs4_lookupfile(component4 *component, struct svc_req *req,
6127 6143 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6128 6144 {
6129 6145 nfsstat4 status;
6130 6146 vnode_t *dvp = cs->vp;
6131 6147 vattr_t bva, ava, fva;
6132 6148 int error;
6133 6149
6134 6150 /* Get "before" change value */
6135 6151 bva.va_mask = AT_CTIME|AT_SEQ;
6136 6152 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6137 6153 if (error)
6138 6154 return (puterrno4(error));
6139 6155
6140 6156 /* rfs4_lookup may VN_RELE directory */
6141 6157 VN_HOLD(dvp);
6142 6158
6143 6159 status = rfs4_lookup(component, req, cs);
6144 6160 if (status != NFS4_OK) {
6145 6161 VN_RELE(dvp);
6146 6162 return (status);
6147 6163 }
6148 6164
6149 6165 /*
6150 6166 * Get "after" change value, if it fails, simply return the
6151 6167 * before value.
6152 6168 */
6153 6169 ava.va_mask = AT_CTIME|AT_SEQ;
6154 6170 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6155 6171 ava.va_ctime = bva.va_ctime;
6156 6172 ava.va_seq = 0;
6157 6173 }
6158 6174 VN_RELE(dvp);
6159 6175
6160 6176 /*
6161 6177 * Validate the file is a file
6162 6178 */
6163 6179 fva.va_mask = AT_TYPE|AT_MODE;
6164 6180 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6165 6181 if (error)
6166 6182 return (puterrno4(error));
6167 6183
6168 6184 if (fva.va_type != VREG) {
6169 6185 if (fva.va_type == VDIR)
6170 6186 return (NFS4ERR_ISDIR);
6171 6187 if (fva.va_type == VLNK)
6172 6188 return (NFS4ERR_SYMLINK);
6173 6189 return (NFS4ERR_INVAL);
6174 6190 }
6175 6191
6176 6192 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6177 6193 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6178 6194
6179 6195 /*
6180 6196 * It is undefined if VOP_LOOKUP will change va_seq, so
6181 6197 * cinfo.atomic = TRUE only if we have
6182 6198 * non-zero va_seq's, and they have not changed.
6183 6199 */
6184 6200 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6185 6201 cinfo->atomic = TRUE;
6186 6202 else
6187 6203 cinfo->atomic = FALSE;
6188 6204
6189 6205 /* Check for mandatory locking */
6190 6206 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6191 6207 return (check_open_access(access, cs, req));
6192 6208 }
6193 6209
6194 6210 static nfsstat4
6195 6211 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6196 6212 cred_t *cr, vnode_t **vpp, bool_t *created)
6197 6213 {
6198 6214 int error;
6199 6215 nfsstat4 status = NFS4_OK;
6200 6216 vattr_t va;
6201 6217
6202 6218 tryagain:
6203 6219
6204 6220 /*
6205 6221 * The file open mode used is VWRITE. If the client needs
6206 6222 * some other semantic, then it should do the access checking
6207 6223 * itself. It would have been nice to have the file open mode
6208 6224 * passed as part of the arguments.
6209 6225 */
6210 6226
6211 6227 *created = TRUE;
6212 6228 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6213 6229
6214 6230 if (error) {
6215 6231 *created = FALSE;
6216 6232
6217 6233 /*
6218 6234 * If we got something other than file already exists
6219 6235 * then just return this error. Otherwise, we got
6220 6236 * EEXIST. If we were doing a GUARDED create, then
6221 6237 * just return this error. Otherwise, we need to
6222 6238 * make sure that this wasn't a duplicate of an
6223 6239 * exclusive create request.
6224 6240 *
6225 6241 * The assumption is made that a non-exclusive create
6226 6242 * request will never return EEXIST.
6227 6243 */
6228 6244
6229 6245 if (error != EEXIST || mode == GUARDED4) {
6230 6246 status = puterrno4(error);
6231 6247 return (status);
6232 6248 }
6233 6249 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6234 6250 NULL, NULL, NULL);
6235 6251
6236 6252 if (error) {
6237 6253 /*
6238 6254 * We couldn't find the file that we thought that
6239 6255 * we just created. So, we'll just try creating
6240 6256 * it again.
6241 6257 */
6242 6258 if (error == ENOENT)
6243 6259 goto tryagain;
6244 6260
6245 6261 status = puterrno4(error);
6246 6262 return (status);
6247 6263 }
6248 6264
6249 6265 if (mode == UNCHECKED4) {
6250 6266 /* existing object must be regular file */
6251 6267 if ((*vpp)->v_type != VREG) {
6252 6268 if ((*vpp)->v_type == VDIR)
6253 6269 status = NFS4ERR_ISDIR;
6254 6270 else if ((*vpp)->v_type == VLNK)
6255 6271 status = NFS4ERR_SYMLINK;
6256 6272 else
6257 6273 status = NFS4ERR_INVAL;
6258 6274 VN_RELE(*vpp);
6259 6275 return (status);
6260 6276 }
6261 6277
6262 6278 return (NFS4_OK);
6263 6279 }
6264 6280
6265 6281 /* Check for duplicate request */
6266 6282 va.va_mask = AT_MTIME;
6267 6283 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6268 6284 if (!error) {
6269 6285 /* We found the file */
6270 6286 const timestruc_t *mtime = &vap->va_mtime;
6271 6287
6272 6288 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6273 6289 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6274 6290 /* but its not our creation */
6275 6291 VN_RELE(*vpp);
6276 6292 return (NFS4ERR_EXIST);
6277 6293 }
6278 6294 *created = TRUE; /* retrans of create == created */
6279 6295 return (NFS4_OK);
6280 6296 }
6281 6297 VN_RELE(*vpp);
6282 6298 return (NFS4ERR_EXIST);
6283 6299 }
6284 6300
6285 6301 return (NFS4_OK);
6286 6302 }
6287 6303
6288 6304 static nfsstat4
6289 6305 check_open_access(uint32_t access, struct compound_state *cs,
6290 6306 struct svc_req *req)
6291 6307 {
6292 6308 int error;
6293 6309 vnode_t *vp;
6294 6310 bool_t readonly;
6295 6311 cred_t *cr = cs->cr;
6296 6312
6297 6313 /* For now we don't allow mandatory locking as per V2/V3 */
6298 6314 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6299 6315 return (NFS4ERR_ACCESS);
6300 6316 }
6301 6317
6302 6318 vp = cs->vp;
6303 6319 ASSERT(cr != NULL && vp->v_type == VREG);
6304 6320
6305 6321 /*
6306 6322 * If the file system is exported read only and we are trying
6307 6323 * to open for write, then return NFS4ERR_ROFS
6308 6324 */
6309 6325
6310 6326 readonly = rdonly4(req, cs);
6311 6327
6312 6328 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6313 6329 return (NFS4ERR_ROFS);
6314 6330
6315 6331 if (access & OPEN4_SHARE_ACCESS_READ) {
6316 6332 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6317 6333 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6318 6334 return (NFS4ERR_ACCESS);
6319 6335 }
6320 6336 }
6321 6337
6322 6338 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6323 6339 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6324 6340 if (error)
6325 6341 return (NFS4ERR_ACCESS);
6326 6342 }
6327 6343
6328 6344 return (NFS4_OK);
6329 6345 }
6330 6346
6331 6347 static nfsstat4
6332 6348 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6333 6349 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6334 6350 {
6335 6351 struct nfs4_svgetit_arg sarg;
6336 6352 struct nfs4_ntov_table ntov;
6337 6353
6338 6354 bool_t ntov_table_init = FALSE;
6339 6355 struct statvfs64 sb;
6340 6356 nfsstat4 status;
6341 6357 vnode_t *vp;
6342 6358 vattr_t bva, ava, iva, cva, *vap;
6343 6359 vnode_t *dvp;
6344 6360 timespec32_t *mtime;
6345 6361 char *nm = NULL;
6346 6362 uint_t buflen;
6347 6363 bool_t created;
6348 6364 bool_t setsize = FALSE;
6349 6365 len_t reqsize;
6350 6366 int error;
6351 6367 bool_t trunc;
6352 6368 caller_context_t ct;
6353 6369 component4 *component;
6354 6370 bslabel_t *clabel;
6355 6371 struct sockaddr *ca;
6356 6372 char *name = NULL;
6357 6373
6358 6374 sarg.sbp = &sb;
6359 6375 sarg.is_referral = B_FALSE;
6360 6376
6361 6377 dvp = cs->vp;
6362 6378
6363 6379 /* Check if the file system is read only */
6364 6380 if (rdonly4(req, cs))
6365 6381 return (NFS4ERR_ROFS);
6366 6382
6367 6383 /* check the label of including directory */
6368 6384 if (is_system_labeled()) {
6369 6385 ASSERT(req->rq_label != NULL);
6370 6386 clabel = req->rq_label;
6371 6387 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6372 6388 "got client label from request(1)",
6373 6389 struct svc_req *, req);
6374 6390 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6375 6391 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6376 6392 cs->exi)) {
6377 6393 return (NFS4ERR_ACCESS);
6378 6394 }
6379 6395 }
6380 6396 }
6381 6397
6382 6398 /*
6383 6399 * Get the last component of path name in nm. cs will reference
6384 6400 * the including directory on success.
6385 6401 */
6386 6402 component = &args->open_claim4_u.file;
6387 6403 status = utf8_dir_verify(component);
6388 6404 if (status != NFS4_OK)
6389 6405 return (status);
6390 6406
6391 6407 nm = utf8_to_fn(component, &buflen, NULL);
6392 6408
6393 6409 if (nm == NULL)
6394 6410 return (NFS4ERR_RESOURCE);
6395 6411
6396 6412 if (buflen > MAXNAMELEN) {
6397 6413 kmem_free(nm, buflen);
6398 6414 return (NFS4ERR_NAMETOOLONG);
6399 6415 }
6400 6416
6401 6417 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6402 6418 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6403 6419 if (error) {
6404 6420 kmem_free(nm, buflen);
6405 6421 return (puterrno4(error));
6406 6422 }
6407 6423
6408 6424 if (bva.va_type != VDIR) {
6409 6425 kmem_free(nm, buflen);
6410 6426 return (NFS4ERR_NOTDIR);
6411 6427 }
6412 6428
6413 6429 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6414 6430
6415 6431 switch (args->mode) {
6416 6432 case GUARDED4:
6417 6433 /*FALLTHROUGH*/
6418 6434 case UNCHECKED4:
6419 6435 nfs4_ntov_table_init(&ntov);
6420 6436 ntov_table_init = TRUE;
6421 6437
6422 6438 *attrset = 0;
6423 6439 status = do_rfs4_set_attrs(attrset,
6424 6440 &args->createhow4_u.createattrs,
6425 6441 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6426 6442
6427 6443 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6428 6444 sarg.vap->va_type != VREG) {
6429 6445 if (sarg.vap->va_type == VDIR)
6430 6446 status = NFS4ERR_ISDIR;
6431 6447 else if (sarg.vap->va_type == VLNK)
6432 6448 status = NFS4ERR_SYMLINK;
6433 6449 else
6434 6450 status = NFS4ERR_INVAL;
6435 6451 }
6436 6452
6437 6453 if (status != NFS4_OK) {
6438 6454 kmem_free(nm, buflen);
6439 6455 nfs4_ntov_table_free(&ntov, &sarg);
6440 6456 *attrset = 0;
6441 6457 return (status);
6442 6458 }
6443 6459
6444 6460 vap = sarg.vap;
6445 6461 vap->va_type = VREG;
6446 6462 vap->va_mask |= AT_TYPE;
6447 6463
6448 6464 if ((vap->va_mask & AT_MODE) == 0) {
6449 6465 vap->va_mask |= AT_MODE;
6450 6466 vap->va_mode = (mode_t)0600;
6451 6467 }
6452 6468
6453 6469 if (vap->va_mask & AT_SIZE) {
6454 6470
6455 6471 /* Disallow create with a non-zero size */
6456 6472
6457 6473 if ((reqsize = sarg.vap->va_size) != 0) {
6458 6474 kmem_free(nm, buflen);
6459 6475 nfs4_ntov_table_free(&ntov, &sarg);
6460 6476 *attrset = 0;
6461 6477 return (NFS4ERR_INVAL);
6462 6478 }
6463 6479 setsize = TRUE;
6464 6480 }
6465 6481 break;
6466 6482
6467 6483 case EXCLUSIVE4:
6468 6484 /* prohibit EXCL create of named attributes */
6469 6485 if (dvp->v_flag & V_XATTRDIR) {
6470 6486 kmem_free(nm, buflen);
6471 6487 *attrset = 0;
6472 6488 return (NFS4ERR_INVAL);
6473 6489 }
6474 6490
6475 6491 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6476 6492 cva.va_type = VREG;
6477 6493 /*
6478 6494 * Ensure no time overflows. Assumes underlying
6479 6495 * filesystem supports at least 32 bits.
6480 6496 * Truncate nsec to usec resolution to allow valid
6481 6497 * compares even if the underlying filesystem truncates.
6482 6498 */
6483 6499 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6484 6500 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6485 6501 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6486 6502 cva.va_mode = (mode_t)0;
6487 6503 vap = &cva;
6488 6504
6489 6505 /*
6490 6506 * For EXCL create, attrset is set to the server attr
6491 6507 * used to cache the client's verifier.
6492 6508 */
6493 6509 *attrset = FATTR4_TIME_MODIFY_MASK;
6494 6510 break;
6495 6511 }
6496 6512
6497 6513 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6498 6514 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6499 6515 MAXPATHLEN + 1);
6500 6516
6501 6517 if (name == NULL) {
6502 6518 kmem_free(nm, buflen);
6503 6519 return (NFS4ERR_SERVERFAULT);
6504 6520 }
6505 6521
6506 6522 status = create_vnode(dvp, name, vap, args->mode,
6507 6523 cs->cr, &vp, &created);
6508 6524 if (nm != name)
6509 6525 kmem_free(name, MAXPATHLEN + 1);
6510 6526 kmem_free(nm, buflen);
6511 6527
6512 6528 if (status != NFS4_OK) {
6513 6529 if (ntov_table_init)
6514 6530 nfs4_ntov_table_free(&ntov, &sarg);
6515 6531 *attrset = 0;
6516 6532 return (status);
6517 6533 }
6518 6534
6519 6535 trunc = (setsize && !created);
6520 6536
6521 6537 if (args->mode != EXCLUSIVE4) {
6522 6538 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6523 6539
6524 6540 /*
6525 6541 * True verification that object was created with correct
6526 6542 * attrs is impossible. The attrs could have been changed
6527 6543 * immediately after object creation. If attributes did
6528 6544 * not verify, the only recourse for the server is to
6529 6545 * destroy the object. Maybe if some attrs (like gid)
6530 6546 * are set incorrectly, the object should be destroyed;
6531 6547 * however, seems bad as a default policy. Do we really
6532 6548 * want to destroy an object over one of the times not
6533 6549 * verifying correctly? For these reasons, the server
6534 6550 * currently sets bits in attrset for createattrs
6535 6551 * that were set; however, no verification is done.
6536 6552 *
6537 6553 * vmask_to_nmask accounts for vattr bits set on create
6538 6554 * [do_rfs4_set_attrs() only sets resp bits for
6539 6555 * non-vattr/vfs bits.]
6540 6556 * Mask off any bits we set by default so as not to return
6541 6557 * more attrset bits than were requested in createattrs
6542 6558 */
6543 6559 if (created) {
6544 6560 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6545 6561 *attrset &= createmask;
6546 6562 } else {
6547 6563 /*
6548 6564 * We did not create the vnode (we tried but it
6549 6565 * already existed). In this case, the only createattr
6550 6566 * that the spec allows the server to set is size,
6551 6567 * and even then, it can only be set if it is 0.
6552 6568 */
6553 6569 *attrset = 0;
6554 6570 if (trunc)
6555 6571 *attrset = FATTR4_SIZE_MASK;
6556 6572 }
6557 6573 }
6558 6574 if (ntov_table_init)
6559 6575 nfs4_ntov_table_free(&ntov, &sarg);
6560 6576
6561 6577 /*
6562 6578 * Get the initial "after" sequence number, if it fails,
6563 6579 * set to zero, time to before.
6564 6580 */
6565 6581 iva.va_mask = AT_CTIME|AT_SEQ;
6566 6582 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6567 6583 iva.va_seq = 0;
6568 6584 iva.va_ctime = bva.va_ctime;
6569 6585 }
6570 6586
6571 6587 /*
6572 6588 * create_vnode attempts to create the file exclusive,
6573 6589 * if it already exists the VOP_CREATE will fail and
6574 6590 * may not increase va_seq. It is atomic if
6575 6591 * we haven't changed the directory, but if it has changed
6576 6592 * we don't know what changed it.
6577 6593 */
6578 6594 if (!created) {
6579 6595 if (bva.va_seq && iva.va_seq &&
6580 6596 bva.va_seq == iva.va_seq)
6581 6597 cinfo->atomic = TRUE;
6582 6598 else
6583 6599 cinfo->atomic = FALSE;
6584 6600 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6585 6601 } else {
6586 6602 /*
6587 6603 * The entry was created, we need to sync the
6588 6604 * directory metadata.
6589 6605 */
6590 6606 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6591 6607
6592 6608 /*
6593 6609 * Get "after" change value, if it fails, simply return the
6594 6610 * before value.
6595 6611 */
6596 6612 ava.va_mask = AT_CTIME|AT_SEQ;
6597 6613 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6598 6614 ava.va_ctime = bva.va_ctime;
6599 6615 ava.va_seq = 0;
6600 6616 }
6601 6617
6602 6618 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6603 6619
6604 6620 /*
6605 6621 * The cinfo->atomic = TRUE only if we have
6606 6622 * non-zero va_seq's, and it has incremented by exactly one
6607 6623 * during the create_vnode and it didn't
6608 6624 * change during the VOP_FSYNC.
6609 6625 */
6610 6626 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6611 6627 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6612 6628 cinfo->atomic = TRUE;
6613 6629 else
6614 6630 cinfo->atomic = FALSE;
6615 6631 }
6616 6632
6617 6633 /* Check for mandatory locking and that the size gets set. */
6618 6634 cva.va_mask = AT_MODE;
6619 6635 if (setsize)
6620 6636 cva.va_mask |= AT_SIZE;
6621 6637
6622 6638 /* Assume the worst */
6623 6639 cs->mandlock = TRUE;
6624 6640
6625 6641 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6626 6642 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6627 6643
6628 6644 /*
6629 6645 * Truncate the file if necessary; this would be
6630 6646 * the case for create over an existing file.
6631 6647 */
6632 6648
6633 6649 if (trunc) {
6634 6650 int in_crit = 0;
6635 6651 rfs4_file_t *fp;
6636 6652 nfs4_srv_t *nsrv4;
6637 6653 bool_t create = FALSE;
6638 6654
6639 6655 /*
6640 6656 * We are writing over an existing file.
6641 6657 * Check to see if we need to recall a delegation.
6642 6658 */
6643 6659 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
6644 6660 rfs4_hold_deleg_policy(nsrv4);
6645 6661 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6646 6662 if (rfs4_check_delegated_byfp(FWRITE, fp,
6647 6663 (reqsize == 0), FALSE, FALSE, &clientid)) {
6648 6664 rfs4_file_rele(fp);
6649 6665 rfs4_rele_deleg_policy(nsrv4);
6650 6666 VN_RELE(vp);
6651 6667 *attrset = 0;
6652 6668 return (NFS4ERR_DELAY);
6653 6669 }
6654 6670 rfs4_file_rele(fp);
6655 6671 }
6656 6672 rfs4_rele_deleg_policy(nsrv4);
6657 6673
6658 6674 if (nbl_need_check(vp)) {
6659 6675 in_crit = 1;
6660 6676
6661 6677 ASSERT(reqsize == 0);
6662 6678
6663 6679 nbl_start_crit(vp, RW_READER);
6664 6680 if (nbl_conflict(vp, NBL_WRITE, 0,
6665 6681 cva.va_size, 0, NULL)) {
6666 6682 in_crit = 0;
6667 6683 nbl_end_crit(vp);
6668 6684 VN_RELE(vp);
6669 6685 *attrset = 0;
6670 6686 return (NFS4ERR_ACCESS);
6671 6687 }
6672 6688 }
6673 6689 ct.cc_sysid = 0;
6674 6690 ct.cc_pid = 0;
6675 6691 ct.cc_caller_id = nfs4_srv_caller_id;
6676 6692 ct.cc_flags = CC_DONTBLOCK;
6677 6693
6678 6694 cva.va_mask = AT_SIZE;
6679 6695 cva.va_size = reqsize;
6680 6696 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6681 6697 if (in_crit)
6682 6698 nbl_end_crit(vp);
6683 6699 }
6684 6700 }
6685 6701
6686 6702 error = makefh4(&cs->fh, vp, cs->exi);
6687 6703
6688 6704 /*
6689 6705 * Force modified data and metadata out to stable storage.
6690 6706 */
6691 6707 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6692 6708
6693 6709 if (error) {
6694 6710 VN_RELE(vp);
6695 6711 *attrset = 0;
6696 6712 return (puterrno4(error));
6697 6713 }
6698 6714
6699 6715 /* if parent dir is attrdir, set namedattr fh flag */
6700 6716 if (dvp->v_flag & V_XATTRDIR)
6701 6717 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6702 6718
6703 6719 if (cs->vp)
6704 6720 VN_RELE(cs->vp);
6705 6721
6706 6722 cs->vp = vp;
6707 6723
6708 6724 /*
6709 6725 * if we did not create the file, we will need to check
6710 6726 * the access bits on the file
6711 6727 */
6712 6728
6713 6729 if (!created) {
6714 6730 if (setsize)
6715 6731 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6716 6732 status = check_open_access(args->share_access, cs, req);
6717 6733 if (status != NFS4_OK)
6718 6734 *attrset = 0;
6719 6735 }
6720 6736 return (status);
6721 6737 }
6722 6738
6723 6739 /*ARGSUSED*/
6724 6740 static void
6725 6741 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6726 6742 rfs4_openowner_t *oo, delegreq_t deleg,
6727 6743 uint32_t access, uint32_t deny,
6728 6744 OPEN4res *resp, int deleg_cur)
6729 6745 {
6730 6746 /* XXX Currently not using req */
6731 6747 rfs4_state_t *sp;
6732 6748 rfs4_file_t *fp;
6733 6749 bool_t screate = TRUE;
6734 6750 bool_t fcreate = TRUE;
6735 6751 uint32_t open_a, share_a;
6736 6752 uint32_t open_d, share_d;
6737 6753 rfs4_deleg_state_t *dsp;
6738 6754 sysid_t sysid;
6739 6755 nfsstat4 status;
6740 6756 caller_context_t ct;
6741 6757 int fflags = 0;
6742 6758 int recall = 0;
6743 6759 int err;
6744 6760 int first_open;
6745 6761
6746 6762 /* get the file struct and hold a lock on it during initial open */
6747 6763 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6748 6764 if (fp == NULL) {
6749 6765 resp->status = NFS4ERR_RESOURCE;
6750 6766 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6751 6767 return;
6752 6768 }
6753 6769
6754 6770 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6755 6771 if (sp == NULL) {
6756 6772 resp->status = NFS4ERR_RESOURCE;
6757 6773 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6758 6774 /* No need to keep any reference */
6759 6775 rw_exit(&fp->rf_file_rwlock);
6760 6776 rfs4_file_rele(fp);
6761 6777 return;
6762 6778 }
6763 6779
6764 6780 /* try to get the sysid before continuing */
6765 6781 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6766 6782 resp->status = status;
6767 6783 rfs4_file_rele(fp);
6768 6784 /* Not a fully formed open; "close" it */
6769 6785 if (screate == TRUE)
6770 6786 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6771 6787 rfs4_state_rele(sp);
6772 6788 return;
6773 6789 }
6774 6790
6775 6791 /* Calculate the fflags for this OPEN. */
6776 6792 if (access & OPEN4_SHARE_ACCESS_READ)
6777 6793 fflags |= FREAD;
6778 6794 if (access & OPEN4_SHARE_ACCESS_WRITE)
6779 6795 fflags |= FWRITE;
6780 6796
6781 6797 rfs4_dbe_lock(sp->rs_dbe);
6782 6798
6783 6799 /*
6784 6800 * Calculate the new deny and access mode that this open is adding to
6785 6801 * the file for this open owner;
6786 6802 */
6787 6803 open_d = (deny & ~sp->rs_open_deny);
6788 6804 open_a = (access & ~sp->rs_open_access);
6789 6805
6790 6806 /*
6791 6807 * Calculate the new share access and share deny modes that this open
6792 6808 * is adding to the file for this open owner;
6793 6809 */
6794 6810 share_a = (access & ~sp->rs_share_access);
6795 6811 share_d = (deny & ~sp->rs_share_deny);
6796 6812
6797 6813 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6798 6814
6799 6815 /*
6800 6816 * Check to see the client has already sent an open for this
6801 6817 * open owner on this file with the same share/deny modes.
6802 6818 * If so, we don't need to check for a conflict and we don't
6803 6819 * need to add another shrlock. If not, then we need to
6804 6820 * check for conflicts in deny and access before checking for
6805 6821 * conflicts in delegation. We don't want to recall a
6806 6822 * delegation based on an open that will eventually fail based
6807 6823 * on shares modes.
6808 6824 */
6809 6825
6810 6826 if (share_a || share_d) {
6811 6827 if ((err = rfs4_share(sp, access, deny)) != 0) {
6812 6828 rfs4_dbe_unlock(sp->rs_dbe);
6813 6829 resp->status = err;
6814 6830
6815 6831 rfs4_file_rele(fp);
6816 6832 /* Not a fully formed open; "close" it */
6817 6833 if (screate == TRUE)
6818 6834 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6819 6835 rfs4_state_rele(sp);
6820 6836 return;
6821 6837 }
6822 6838 }
6823 6839
6824 6840 rfs4_dbe_lock(fp->rf_dbe);
6825 6841
6826 6842 /*
6827 6843 * Check to see if this file is delegated and if so, if a
6828 6844 * recall needs to be done.
6829 6845 */
6830 6846 if (rfs4_check_recall(sp, access)) {
6831 6847 rfs4_dbe_unlock(fp->rf_dbe);
6832 6848 rfs4_dbe_unlock(sp->rs_dbe);
6833 6849 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6834 6850 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6835 6851 rfs4_dbe_lock(sp->rs_dbe);
6836 6852
6837 6853 /* if state closed while lock was dropped */
6838 6854 if (sp->rs_closed) {
6839 6855 if (share_a || share_d)
6840 6856 (void) rfs4_unshare(sp);
6841 6857 rfs4_dbe_unlock(sp->rs_dbe);
6842 6858 rfs4_file_rele(fp);
6843 6859 /* Not a fully formed open; "close" it */
6844 6860 if (screate == TRUE)
6845 6861 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6846 6862 rfs4_state_rele(sp);
6847 6863 resp->status = NFS4ERR_OLD_STATEID;
6848 6864 return;
6849 6865 }
6850 6866
6851 6867 rfs4_dbe_lock(fp->rf_dbe);
6852 6868 /* Let's see if the delegation was returned */
6853 6869 if (rfs4_check_recall(sp, access)) {
6854 6870 rfs4_dbe_unlock(fp->rf_dbe);
6855 6871 if (share_a || share_d)
6856 6872 (void) rfs4_unshare(sp);
6857 6873 rfs4_dbe_unlock(sp->rs_dbe);
6858 6874 rfs4_file_rele(fp);
6859 6875 rfs4_update_lease(sp->rs_owner->ro_client);
6860 6876
6861 6877 /* Not a fully formed open; "close" it */
6862 6878 if (screate == TRUE)
6863 6879 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6864 6880 rfs4_state_rele(sp);
6865 6881 resp->status = NFS4ERR_DELAY;
6866 6882 return;
6867 6883 }
6868 6884 }
6869 6885 /*
6870 6886 * the share check passed and any delegation conflict has been
6871 6887 * taken care of, now call vop_open.
6872 6888 * if this is the first open then call vop_open with fflags.
6873 6889 * if not, call vn_open_upgrade with just the upgrade flags.
6874 6890 *
6875 6891 * if the file has been opened already, it will have the current
6876 6892 * access mode in the state struct. if it has no share access, then
6877 6893 * this is a new open.
6878 6894 *
6879 6895 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6880 6896 * call VOP_OPEN(), just do the open upgrade.
6881 6897 */
6882 6898 if (first_open && !deleg_cur) {
6883 6899 ct.cc_sysid = sysid;
6884 6900 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6885 6901 ct.cc_caller_id = nfs4_srv_caller_id;
6886 6902 ct.cc_flags = CC_DONTBLOCK;
6887 6903 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6888 6904 if (err) {
6889 6905 rfs4_dbe_unlock(fp->rf_dbe);
6890 6906 if (share_a || share_d)
6891 6907 (void) rfs4_unshare(sp);
6892 6908 rfs4_dbe_unlock(sp->rs_dbe);
6893 6909 rfs4_file_rele(fp);
6894 6910
6895 6911 /* Not a fully formed open; "close" it */
6896 6912 if (screate == TRUE)
6897 6913 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6898 6914 rfs4_state_rele(sp);
6899 6915 /* check if a monitor detected a delegation conflict */
6900 6916 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6901 6917 resp->status = NFS4ERR_DELAY;
6902 6918 else
6903 6919 resp->status = NFS4ERR_SERVERFAULT;
6904 6920 return;
6905 6921 }
6906 6922 } else { /* open upgrade */
6907 6923 /*
6908 6924 * calculate the fflags for the new mode that is being added
6909 6925 * by this upgrade.
6910 6926 */
6911 6927 fflags = 0;
6912 6928 if (open_a & OPEN4_SHARE_ACCESS_READ)
6913 6929 fflags |= FREAD;
6914 6930 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6915 6931 fflags |= FWRITE;
6916 6932 vn_open_upgrade(cs->vp, fflags);
6917 6933 }
6918 6934 sp->rs_open_access |= access;
6919 6935 sp->rs_open_deny |= deny;
6920 6936
6921 6937 if (open_d & OPEN4_SHARE_DENY_READ)
6922 6938 fp->rf_deny_read++;
6923 6939 if (open_d & OPEN4_SHARE_DENY_WRITE)
6924 6940 fp->rf_deny_write++;
6925 6941 fp->rf_share_deny |= deny;
6926 6942
6927 6943 if (open_a & OPEN4_SHARE_ACCESS_READ)
6928 6944 fp->rf_access_read++;
6929 6945 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6930 6946 fp->rf_access_write++;
6931 6947 fp->rf_share_access |= access;
6932 6948
6933 6949 /*
6934 6950 * Check for delegation here. if the deleg argument is not
6935 6951 * DELEG_ANY, then this is a reclaim from a client and
6936 6952 * we must honor the delegation requested. If necessary we can
6937 6953 * set the recall flag.
6938 6954 */
6939 6955
6940 6956 dsp = rfs4_grant_delegation(deleg, sp, &recall);
6941 6957
6942 6958 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6943 6959
6944 6960 next_stateid(&sp->rs_stateid);
6945 6961
6946 6962 resp->stateid = sp->rs_stateid.stateid;
6947 6963
6948 6964 rfs4_dbe_unlock(fp->rf_dbe);
6949 6965 rfs4_dbe_unlock(sp->rs_dbe);
6950 6966
6951 6967 if (dsp) {
6952 6968 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6953 6969 rfs4_deleg_state_rele(dsp);
6954 6970 }
6955 6971
6956 6972 rfs4_file_rele(fp);
6957 6973 rfs4_state_rele(sp);
6958 6974
6959 6975 resp->status = NFS4_OK;
6960 6976 }
6961 6977
6962 6978 /*ARGSUSED*/
6963 6979 static void
6964 6980 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6965 6981 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6966 6982 {
6967 6983 change_info4 *cinfo = &resp->cinfo;
6968 6984 bitmap4 *attrset = &resp->attrset;
6969 6985
6970 6986 if (args->opentype == OPEN4_NOCREATE)
6971 6987 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6972 6988 req, cs, args->share_access, cinfo);
6973 6989 else {
6974 6990 /* inhibit delegation grants during exclusive create */
6975 6991
6976 6992 if (args->mode == EXCLUSIVE4)
6977 6993 rfs4_disable_delegation();
6978 6994
6979 6995 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6980 6996 oo->ro_client->rc_clientid);
6981 6997 }
6982 6998
6983 6999 if (resp->status == NFS4_OK) {
6984 7000
6985 7001 /* cs->vp cs->fh now reference the desired file */
6986 7002
6987 7003 rfs4_do_open(cs, req, oo,
6988 7004 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6989 7005 args->share_access, args->share_deny, resp, 0);
6990 7006
6991 7007 /*
6992 7008 * If rfs4_createfile set attrset, we must
6993 7009 * clear this attrset before the response is copied.
6994 7010 */
6995 7011 if (resp->status != NFS4_OK && resp->attrset) {
6996 7012 resp->attrset = 0;
6997 7013 }
6998 7014 }
6999 7015 else
7000 7016 *cs->statusp = resp->status;
7001 7017
7002 7018 if (args->mode == EXCLUSIVE4)
7003 7019 rfs4_enable_delegation();
7004 7020 }
7005 7021
7006 7022 /*ARGSUSED*/
7007 7023 static void
7008 7024 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7009 7025 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7010 7026 {
7011 7027 change_info4 *cinfo = &resp->cinfo;
7012 7028 vattr_t va;
7013 7029 vtype_t v_type = cs->vp->v_type;
7014 7030 int error = 0;
7015 7031
7016 7032 /* Verify that we have a regular file */
7017 7033 if (v_type != VREG) {
7018 7034 if (v_type == VDIR)
7019 7035 resp->status = NFS4ERR_ISDIR;
7020 7036 else if (v_type == VLNK)
7021 7037 resp->status = NFS4ERR_SYMLINK;
7022 7038 else
7023 7039 resp->status = NFS4ERR_INVAL;
7024 7040 return;
7025 7041 }
7026 7042
7027 7043 va.va_mask = AT_MODE|AT_UID;
7028 7044 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7029 7045 if (error) {
7030 7046 resp->status = puterrno4(error);
7031 7047 return;
7032 7048 }
7033 7049
7034 7050 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7035 7051
7036 7052 /*
7037 7053 * Check if we have access to the file, Note the the file
7038 7054 * could have originally been open UNCHECKED or GUARDED
7039 7055 * with mode bits that will now fail, but there is nothing
7040 7056 * we can really do about that except in the case that the
7041 7057 * owner of the file is the one requesting the open.
7042 7058 */
7043 7059 if (crgetuid(cs->cr) != va.va_uid) {
7044 7060 resp->status = check_open_access(args->share_access, cs, req);
7045 7061 if (resp->status != NFS4_OK) {
7046 7062 return;
7047 7063 }
7048 7064 }
7049 7065
7050 7066 /*
7051 7067 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7052 7068 */
7053 7069 cinfo->before = 0;
7054 7070 cinfo->after = 0;
7055 7071 cinfo->atomic = FALSE;
7056 7072
7057 7073 rfs4_do_open(cs, req, oo,
7058 7074 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7059 7075 args->share_access, args->share_deny, resp, 0);
7060 7076 }
7061 7077
7062 7078 static void
7063 7079 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7064 7080 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7065 7081 {
7066 7082 int error;
7067 7083 nfsstat4 status;
7068 7084 stateid4 stateid =
7069 7085 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7070 7086 rfs4_deleg_state_t *dsp;
7071 7087
7072 7088 /*
7073 7089 * Find the state info from the stateid and confirm that the
7074 7090 * file is delegated. If the state openowner is the same as
7075 7091 * the supplied openowner we're done. If not, get the file
7076 7092 * info from the found state info. Use that file info to
7077 7093 * create the state for this lock owner. Note solaris doen't
7078 7094 * really need the pathname to find the file. We may want to
7079 7095 * lookup the pathname and make sure that the vp exist and
7080 7096 * matches the vp in the file structure. However it is
7081 7097 * possible that the pathname nolonger exists (local process
7082 7098 * unlinks the file), so this may not be that useful.
7083 7099 */
7084 7100
7085 7101 status = rfs4_get_deleg_state(&stateid, &dsp);
7086 7102 if (status != NFS4_OK) {
7087 7103 resp->status = status;
7088 7104 return;
7089 7105 }
7090 7106
7091 7107 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7092 7108
7093 7109 /*
7094 7110 * New lock owner, create state. Since this was probably called
7095 7111 * in response to a CB_RECALL we set deleg to DELEG_NONE
7096 7112 */
7097 7113
7098 7114 ASSERT(cs->vp != NULL);
7099 7115 VN_RELE(cs->vp);
7100 7116 VN_HOLD(dsp->rds_finfo->rf_vp);
7101 7117 cs->vp = dsp->rds_finfo->rf_vp;
7102 7118
7103 7119 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7104 7120 rfs4_deleg_state_rele(dsp);
7105 7121 *cs->statusp = resp->status = puterrno4(error);
7106 7122 return;
7107 7123 }
7108 7124
7109 7125 /* Mark progress for delegation returns */
7110 7126 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7111 7127 rfs4_deleg_state_rele(dsp);
7112 7128 rfs4_do_open(cs, req, oo, DELEG_NONE,
7113 7129 args->share_access, args->share_deny, resp, 1);
7114 7130 }
7115 7131
7116 7132 /*ARGSUSED*/
7117 7133 static void
7118 7134 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7119 7135 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7120 7136 {
7121 7137 /*
7122 7138 * Lookup the pathname, it must already exist since this file
7123 7139 * was delegated.
7124 7140 *
7125 7141 * Find the file and state info for this vp and open owner pair.
7126 7142 * check that they are in fact delegated.
7127 7143 * check that the state access and deny modes are the same.
7128 7144 *
7129 7145 * Return the delgation possibly seting the recall flag.
7130 7146 */
7131 7147 rfs4_file_t *fp;
7132 7148 rfs4_state_t *sp;
7133 7149 bool_t create = FALSE;
7134 7150 bool_t dcreate = FALSE;
7135 7151 rfs4_deleg_state_t *dsp;
7136 7152 nfsace4 *ace;
7137 7153
7138 7154 /* Note we ignore oflags */
7139 7155 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7140 7156 req, cs, args->share_access, &resp->cinfo);
7141 7157
7142 7158 if (resp->status != NFS4_OK) {
7143 7159 return;
7144 7160 }
7145 7161
7146 7162 /* get the file struct and hold a lock on it during initial open */
7147 7163 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7148 7164 if (fp == NULL) {
7149 7165 resp->status = NFS4ERR_RESOURCE;
7150 7166 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7151 7167 return;
7152 7168 }
7153 7169
7154 7170 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7155 7171 if (sp == NULL) {
7156 7172 resp->status = NFS4ERR_SERVERFAULT;
7157 7173 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7158 7174 rw_exit(&fp->rf_file_rwlock);
7159 7175 rfs4_file_rele(fp);
7160 7176 return;
7161 7177 }
7162 7178
7163 7179 rfs4_dbe_lock(sp->rs_dbe);
7164 7180 rfs4_dbe_lock(fp->rf_dbe);
7165 7181 if (args->share_access != sp->rs_share_access ||
7166 7182 args->share_deny != sp->rs_share_deny ||
7167 7183 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7168 7184 NFS4_DEBUG(rfs4_debug,
7169 7185 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7170 7186 rfs4_dbe_unlock(fp->rf_dbe);
7171 7187 rfs4_dbe_unlock(sp->rs_dbe);
7172 7188 rfs4_file_rele(fp);
7173 7189 rfs4_state_rele(sp);
7174 7190 resp->status = NFS4ERR_SERVERFAULT;
7175 7191 return;
7176 7192 }
7177 7193 rfs4_dbe_unlock(fp->rf_dbe);
7178 7194 rfs4_dbe_unlock(sp->rs_dbe);
7179 7195
7180 7196 dsp = rfs4_finddeleg(sp, &dcreate);
7181 7197 if (dsp == NULL) {
7182 7198 rfs4_state_rele(sp);
7183 7199 rfs4_file_rele(fp);
7184 7200 resp->status = NFS4ERR_SERVERFAULT;
7185 7201 return;
7186 7202 }
7187 7203
7188 7204 next_stateid(&sp->rs_stateid);
7189 7205
7190 7206 resp->stateid = sp->rs_stateid.stateid;
7191 7207
7192 7208 resp->delegation.delegation_type = dsp->rds_dtype;
7193 7209
7194 7210 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7195 7211 open_read_delegation4 *rv =
7196 7212 &resp->delegation.open_delegation4_u.read;
7197 7213
7198 7214 rv->stateid = dsp->rds_delegid.stateid;
7199 7215 rv->recall = FALSE; /* no policy in place to set to TRUE */
7200 7216 ace = &rv->permissions;
7201 7217 } else {
7202 7218 open_write_delegation4 *rv =
7203 7219 &resp->delegation.open_delegation4_u.write;
7204 7220
7205 7221 rv->stateid = dsp->rds_delegid.stateid;
7206 7222 rv->recall = FALSE; /* no policy in place to set to TRUE */
7207 7223 ace = &rv->permissions;
7208 7224 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7209 7225 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7210 7226 }
7211 7227
7212 7228 /* XXX For now */
7213 7229 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7214 7230 ace->flag = 0;
7215 7231 ace->access_mask = 0;
7216 7232 ace->who.utf8string_len = 0;
7217 7233 ace->who.utf8string_val = 0;
7218 7234
7219 7235 rfs4_deleg_state_rele(dsp);
7220 7236 rfs4_state_rele(sp);
7221 7237 rfs4_file_rele(fp);
7222 7238 }
7223 7239
7224 7240 typedef enum {
7225 7241 NFS4_CHKSEQ_OKAY = 0,
7226 7242 NFS4_CHKSEQ_REPLAY = 1,
7227 7243 NFS4_CHKSEQ_BAD = 2
7228 7244 } rfs4_chkseq_t;
7229 7245
7230 7246 /*
7231 7247 * Generic function for sequence number checks.
7232 7248 */
7233 7249 static rfs4_chkseq_t
7234 7250 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7235 7251 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7236 7252 {
7237 7253 /* Same sequence ids and matching operations? */
7238 7254 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7239 7255 if (copyres == TRUE) {
7240 7256 rfs4_free_reply(resop);
7241 7257 rfs4_copy_reply(resop, lastop);
7242 7258 }
7243 7259 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7244 7260 "Replayed SEQID %d\n", seqid));
7245 7261 return (NFS4_CHKSEQ_REPLAY);
7246 7262 }
7247 7263
7248 7264 /* If the incoming sequence is not the next expected then it is bad */
7249 7265 if (rqst_seq != seqid + 1) {
7250 7266 if (rqst_seq == seqid) {
7251 7267 NFS4_DEBUG(rfs4_debug,
7252 7268 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7253 7269 "but last op was %d current op is %d\n",
7254 7270 lastop->resop, resop->resop));
7255 7271 return (NFS4_CHKSEQ_BAD);
7256 7272 }
7257 7273 NFS4_DEBUG(rfs4_debug,
7258 7274 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7259 7275 rqst_seq, seqid));
7260 7276 return (NFS4_CHKSEQ_BAD);
7261 7277 }
7262 7278
7263 7279 /* Everything okay -- next expected */
7264 7280 return (NFS4_CHKSEQ_OKAY);
7265 7281 }
7266 7282
7267 7283
7268 7284 static rfs4_chkseq_t
7269 7285 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7270 7286 {
7271 7287 rfs4_chkseq_t rc;
7272 7288
7273 7289 rfs4_dbe_lock(op->ro_dbe);
7274 7290 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7275 7291 TRUE);
7276 7292 rfs4_dbe_unlock(op->ro_dbe);
7277 7293
7278 7294 if (rc == NFS4_CHKSEQ_OKAY)
7279 7295 rfs4_update_lease(op->ro_client);
7280 7296
7281 7297 return (rc);
7282 7298 }
7283 7299
7284 7300 static rfs4_chkseq_t
7285 7301 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7286 7302 {
7287 7303 rfs4_chkseq_t rc;
7288 7304
7289 7305 rfs4_dbe_lock(op->ro_dbe);
7290 7306 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7291 7307 olo_seqid, resop, FALSE);
7292 7308 rfs4_dbe_unlock(op->ro_dbe);
7293 7309
7294 7310 return (rc);
7295 7311 }
7296 7312
7297 7313 static rfs4_chkseq_t
7298 7314 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7299 7315 {
7300 7316 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7301 7317
7302 7318 rfs4_dbe_lock(lsp->rls_dbe);
7303 7319 if (!lsp->rls_skip_seqid_check)
7304 7320 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7305 7321 resop, TRUE);
7306 7322 rfs4_dbe_unlock(lsp->rls_dbe);
7307 7323
7308 7324 return (rc);
7309 7325 }
7310 7326
7311 7327 static void
7312 7328 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7313 7329 struct svc_req *req, struct compound_state *cs)
7314 7330 {
7315 7331 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7316 7332 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7317 7333 open_owner4 *owner = &args->owner;
7318 7334 open_claim_type4 claim = args->claim;
7319 7335 rfs4_client_t *cp;
7320 7336 rfs4_openowner_t *oo;
7321 7337 bool_t create;
7322 7338 bool_t replay = FALSE;
7323 7339 int can_reclaim;
7324 7340
7325 7341 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7326 7342 OPEN4args *, args);
7327 7343
7328 7344 if (cs->vp == NULL) {
7329 7345 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7330 7346 goto end;
7331 7347 }
7332 7348
7333 7349 /*
7334 7350 * Need to check clientid and lease expiration first based on
7335 7351 * error ordering and incrementing sequence id.
7336 7352 */
7337 7353 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7338 7354 if (cp == NULL) {
7339 7355 *cs->statusp = resp->status =
7340 7356 rfs4_check_clientid(&owner->clientid, 0);
7341 7357 goto end;
7342 7358 }
7343 7359
7344 7360 if (rfs4_lease_expired(cp)) {
7345 7361 rfs4_client_close(cp);
7346 7362 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7347 7363 goto end;
7348 7364 }
7349 7365 can_reclaim = cp->rc_can_reclaim;
7350 7366
7351 7367 /*
7352 7368 * Find the open_owner for use from this point forward. Take
7353 7369 * care in updating the sequence id based on the type of error
7354 7370 * being returned.
7355 7371 */
7356 7372 retry:
7357 7373 create = TRUE;
7358 7374 oo = rfs4_findopenowner(owner, &create, args->seqid);
7359 7375 if (oo == NULL) {
7360 7376 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7361 7377 rfs4_client_rele(cp);
7362 7378 goto end;
7363 7379 }
7364 7380
7365 7381 /* Hold off access to the sequence space while the open is done */
7366 7382 rfs4_sw_enter(&oo->ro_sw);
7367 7383
7368 7384 /*
7369 7385 * If the open_owner existed before at the server, then check
7370 7386 * the sequence id.
7371 7387 */
7372 7388 if (!create && !oo->ro_postpone_confirm) {
7373 7389 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7374 7390 case NFS4_CHKSEQ_BAD:
7375 7391 if ((args->seqid > oo->ro_open_seqid) &&
7376 7392 oo->ro_need_confirm) {
7377 7393 rfs4_free_opens(oo, TRUE, FALSE);
7378 7394 rfs4_sw_exit(&oo->ro_sw);
7379 7395 rfs4_openowner_rele(oo);
7380 7396 goto retry;
7381 7397 }
7382 7398 resp->status = NFS4ERR_BAD_SEQID;
7383 7399 goto out;
7384 7400 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7385 7401 replay = TRUE;
7386 7402 goto out;
7387 7403 default:
7388 7404 break;
7389 7405 }
7390 7406
7391 7407 /*
7392 7408 * Sequence was ok and open owner exists
7393 7409 * check to see if we have yet to see an
7394 7410 * open_confirm.
7395 7411 */
7396 7412 if (oo->ro_need_confirm) {
7397 7413 rfs4_free_opens(oo, TRUE, FALSE);
7398 7414 rfs4_sw_exit(&oo->ro_sw);
7399 7415 rfs4_openowner_rele(oo);
7400 7416 goto retry;
7401 7417 }
7402 7418 }
7403 7419 /* Grace only applies to regular-type OPENs */
7404 7420 if (rfs4_clnt_in_grace(cp) &&
7405 7421 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7406 7422 *cs->statusp = resp->status = NFS4ERR_GRACE;
7407 7423 goto out;
7408 7424 }
7409 7425
7410 7426 /*
7411 7427 * If previous state at the server existed then can_reclaim
7412 7428 * will be set. If not reply NFS4ERR_NO_GRACE to the
7413 7429 * client.
7414 7430 */
7415 7431 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7416 7432 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7417 7433 goto out;
7418 7434 }
7419 7435
7420 7436
7421 7437 /*
7422 7438 * Reject the open if the client has missed the grace period
7423 7439 */
7424 7440 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7425 7441 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7426 7442 goto out;
7427 7443 }
7428 7444
7429 7445 /* Couple of up-front bookkeeping items */
7430 7446 if (oo->ro_need_confirm) {
7431 7447 /*
7432 7448 * If this is a reclaim OPEN then we should not ask
7433 7449 * for a confirmation of the open_owner per the
7434 7450 * protocol specification.
7435 7451 */
7436 7452 if (claim == CLAIM_PREVIOUS)
7437 7453 oo->ro_need_confirm = FALSE;
7438 7454 else
7439 7455 resp->rflags |= OPEN4_RESULT_CONFIRM;
7440 7456 }
7441 7457 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7442 7458
7443 7459 /*
7444 7460 * If there is an unshared filesystem mounted on this vnode,
7445 7461 * do not allow to open/create in this directory.
7446 7462 */
7447 7463 if (vn_ismntpt(cs->vp)) {
7448 7464 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7449 7465 goto out;
7450 7466 }
7451 7467
7452 7468 /*
7453 7469 * access must READ, WRITE, or BOTH. No access is invalid.
7454 7470 * deny can be READ, WRITE, BOTH, or NONE.
7455 7471 * bits not defined for access/deny are invalid.
7456 7472 */
7457 7473 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7458 7474 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7459 7475 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7460 7476 *cs->statusp = resp->status = NFS4ERR_INVAL;
7461 7477 goto out;
7462 7478 }
7463 7479
7464 7480
7465 7481 /*
7466 7482 * make sure attrset is zero before response is built.
7467 7483 */
7468 7484 resp->attrset = 0;
7469 7485
7470 7486 switch (claim) {
7471 7487 case CLAIM_NULL:
7472 7488 rfs4_do_opennull(cs, req, args, oo, resp);
7473 7489 break;
7474 7490 case CLAIM_PREVIOUS:
7475 7491 rfs4_do_openprev(cs, req, args, oo, resp);
7476 7492 break;
7477 7493 case CLAIM_DELEGATE_CUR:
7478 7494 rfs4_do_opendelcur(cs, req, args, oo, resp);
7479 7495 break;
7480 7496 case CLAIM_DELEGATE_PREV:
7481 7497 rfs4_do_opendelprev(cs, req, args, oo, resp);
7482 7498 break;
7483 7499 default:
7484 7500 resp->status = NFS4ERR_INVAL;
7485 7501 break;
7486 7502 }
7487 7503
7488 7504 out:
7489 7505 rfs4_client_rele(cp);
7490 7506
7491 7507 /* Catch sequence id handling here to make it a little easier */
7492 7508 switch (resp->status) {
7493 7509 case NFS4ERR_BADXDR:
7494 7510 case NFS4ERR_BAD_SEQID:
7495 7511 case NFS4ERR_BAD_STATEID:
7496 7512 case NFS4ERR_NOFILEHANDLE:
7497 7513 case NFS4ERR_RESOURCE:
7498 7514 case NFS4ERR_STALE_CLIENTID:
7499 7515 case NFS4ERR_STALE_STATEID:
7500 7516 /*
7501 7517 * The protocol states that if any of these errors are
7502 7518 * being returned, the sequence id should not be
7503 7519 * incremented. Any other return requires an
7504 7520 * increment.
7505 7521 */
7506 7522 break;
7507 7523 default:
7508 7524 /* Always update the lease in this case */
7509 7525 rfs4_update_lease(oo->ro_client);
7510 7526
7511 7527 /* Regular response - copy the result */
7512 7528 if (!replay)
7513 7529 rfs4_update_open_resp(oo, resop, &cs->fh);
7514 7530
7515 7531 /*
7516 7532 * REPLAY case: Only if the previous response was OK
7517 7533 * do we copy the filehandle. If not OK, no
7518 7534 * filehandle to copy.
7519 7535 */
7520 7536 if (replay == TRUE &&
7521 7537 resp->status == NFS4_OK &&
7522 7538 oo->ro_reply_fh.nfs_fh4_val) {
7523 7539 /*
7524 7540 * If this is a replay, we must restore the
7525 7541 * current filehandle/vp to that of what was
7526 7542 * returned originally. Try our best to do
7527 7543 * it.
7528 7544 */
7529 7545 nfs_fh4_fmt_t *fh_fmtp =
7530 7546 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7531 7547
7532 7548 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7533 7549 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7534 7550
7535 7551 if (cs->exi == NULL) {
7536 7552 resp->status = NFS4ERR_STALE;
7537 7553 goto finish;
7538 7554 }
7539 7555
7540 7556 VN_RELE(cs->vp);
7541 7557
7542 7558 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7543 7559 &resp->status);
7544 7560
7545 7561 if (cs->vp == NULL)
7546 7562 goto finish;
7547 7563
7548 7564 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7549 7565 }
7550 7566
7551 7567 /*
7552 7568 * If this was a replay, no need to update the
7553 7569 * sequence id. If the open_owner was not created on
7554 7570 * this pass, then update. The first use of an
7555 7571 * open_owner will not bump the sequence id.
7556 7572 */
7557 7573 if (replay == FALSE && !create)
7558 7574 rfs4_update_open_sequence(oo);
7559 7575 /*
7560 7576 * If the client is receiving an error and the
7561 7577 * open_owner needs to be confirmed, there is no way
7562 7578 * to notify the client of this fact ignoring the fact
7563 7579 * that the server has no method of returning a
7564 7580 * stateid to confirm. Therefore, the server needs to
7565 7581 * mark this open_owner in a way as to avoid the
7566 7582 * sequence id checking the next time the client uses
7567 7583 * this open_owner.
7568 7584 */
7569 7585 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7570 7586 oo->ro_postpone_confirm = TRUE;
7571 7587 /*
7572 7588 * If OK response then clear the postpone flag and
7573 7589 * reset the sequence id to keep in sync with the
7574 7590 * client.
7575 7591 */
7576 7592 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7577 7593 oo->ro_postpone_confirm = FALSE;
7578 7594 oo->ro_open_seqid = args->seqid;
7579 7595 }
7580 7596 break;
7581 7597 }
7582 7598
7583 7599 finish:
7584 7600 *cs->statusp = resp->status;
7585 7601
7586 7602 rfs4_sw_exit(&oo->ro_sw);
7587 7603 rfs4_openowner_rele(oo);
7588 7604
7589 7605 end:
7590 7606 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7591 7607 OPEN4res *, resp);
7592 7608 }
7593 7609
7594 7610 /*ARGSUSED*/
7595 7611 void
7596 7612 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7597 7613 struct svc_req *req, struct compound_state *cs)
7598 7614 {
7599 7615 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7600 7616 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7601 7617 rfs4_state_t *sp;
7602 7618 nfsstat4 status;
7603 7619
7604 7620 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7605 7621 OPEN_CONFIRM4args *, args);
7606 7622
7607 7623 if (cs->vp == NULL) {
7608 7624 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7609 7625 goto out;
7610 7626 }
7611 7627
7612 7628 if (cs->vp->v_type != VREG) {
7613 7629 *cs->statusp = resp->status =
7614 7630 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7615 7631 return;
7616 7632 }
7617 7633
7618 7634 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7619 7635 if (status != NFS4_OK) {
7620 7636 *cs->statusp = resp->status = status;
7621 7637 goto out;
7622 7638 }
7623 7639
7624 7640 /* Ensure specified filehandle matches */
7625 7641 if (cs->vp != sp->rs_finfo->rf_vp) {
7626 7642 rfs4_state_rele(sp);
7627 7643 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7628 7644 goto out;
7629 7645 }
7630 7646
7631 7647 /* hold off other access to open_owner while we tinker */
7632 7648 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7633 7649
7634 7650 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7635 7651 case NFS4_CHECK_STATEID_OKAY:
7636 7652 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7637 7653 resop) != 0) {
7638 7654 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7639 7655 break;
7640 7656 }
7641 7657 /*
7642 7658 * If it is the appropriate stateid and determined to
7643 7659 * be "OKAY" then this means that the stateid does not
7644 7660 * need to be confirmed and the client is in error for
7645 7661 * sending an OPEN_CONFIRM.
7646 7662 */
7647 7663 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7648 7664 break;
7649 7665 case NFS4_CHECK_STATEID_OLD:
7650 7666 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7651 7667 break;
7652 7668 case NFS4_CHECK_STATEID_BAD:
7653 7669 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7654 7670 break;
7655 7671 case NFS4_CHECK_STATEID_EXPIRED:
7656 7672 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7657 7673 break;
7658 7674 case NFS4_CHECK_STATEID_CLOSED:
7659 7675 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7660 7676 break;
7661 7677 case NFS4_CHECK_STATEID_REPLAY:
7662 7678 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7663 7679 resop)) {
7664 7680 case NFS4_CHKSEQ_OKAY:
7665 7681 /*
7666 7682 * This is replayed stateid; if seqid matches
7667 7683 * next expected, then client is using wrong seqid.
7668 7684 */
7669 7685 /* fall through */
7670 7686 case NFS4_CHKSEQ_BAD:
7671 7687 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7672 7688 break;
7673 7689 case NFS4_CHKSEQ_REPLAY:
7674 7690 /*
7675 7691 * Note this case is the duplicate case so
7676 7692 * resp->status is already set.
7677 7693 */
7678 7694 *cs->statusp = resp->status;
7679 7695 rfs4_update_lease(sp->rs_owner->ro_client);
7680 7696 break;
7681 7697 }
7682 7698 break;
7683 7699 case NFS4_CHECK_STATEID_UNCONFIRMED:
7684 7700 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7685 7701 resop) != NFS4_CHKSEQ_OKAY) {
7686 7702 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7687 7703 break;
7688 7704 }
7689 7705 *cs->statusp = resp->status = NFS4_OK;
7690 7706
7691 7707 next_stateid(&sp->rs_stateid);
7692 7708 resp->open_stateid = sp->rs_stateid.stateid;
7693 7709 sp->rs_owner->ro_need_confirm = FALSE;
7694 7710 rfs4_update_lease(sp->rs_owner->ro_client);
7695 7711 rfs4_update_open_sequence(sp->rs_owner);
7696 7712 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7697 7713 break;
7698 7714 default:
7699 7715 ASSERT(FALSE);
7700 7716 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7701 7717 break;
7702 7718 }
7703 7719 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7704 7720 rfs4_state_rele(sp);
7705 7721
7706 7722 out:
7707 7723 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7708 7724 OPEN_CONFIRM4res *, resp);
7709 7725 }
7710 7726
7711 7727 /*ARGSUSED*/
7712 7728 void
7713 7729 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7714 7730 struct svc_req *req, struct compound_state *cs)
7715 7731 {
7716 7732 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7717 7733 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7718 7734 uint32_t access = args->share_access;
7719 7735 uint32_t deny = args->share_deny;
7720 7736 nfsstat4 status;
7721 7737 rfs4_state_t *sp;
7722 7738 rfs4_file_t *fp;
7723 7739 int fflags = 0;
7724 7740
7725 7741 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7726 7742 OPEN_DOWNGRADE4args *, args);
7727 7743
7728 7744 if (cs->vp == NULL) {
7729 7745 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7730 7746 goto out;
7731 7747 }
7732 7748
7733 7749 if (cs->vp->v_type != VREG) {
7734 7750 *cs->statusp = resp->status = NFS4ERR_INVAL;
7735 7751 return;
7736 7752 }
7737 7753
7738 7754 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7739 7755 if (status != NFS4_OK) {
7740 7756 *cs->statusp = resp->status = status;
7741 7757 goto out;
7742 7758 }
7743 7759
7744 7760 /* Ensure specified filehandle matches */
7745 7761 if (cs->vp != sp->rs_finfo->rf_vp) {
7746 7762 rfs4_state_rele(sp);
7747 7763 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7748 7764 goto out;
7749 7765 }
7750 7766
7751 7767 /* hold off other access to open_owner while we tinker */
7752 7768 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7753 7769
7754 7770 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7755 7771 case NFS4_CHECK_STATEID_OKAY:
7756 7772 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7757 7773 resop) != NFS4_CHKSEQ_OKAY) {
7758 7774 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7759 7775 goto end;
7760 7776 }
7761 7777 break;
7762 7778 case NFS4_CHECK_STATEID_OLD:
7763 7779 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7764 7780 goto end;
7765 7781 case NFS4_CHECK_STATEID_BAD:
7766 7782 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7767 7783 goto end;
7768 7784 case NFS4_CHECK_STATEID_EXPIRED:
7769 7785 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7770 7786 goto end;
7771 7787 case NFS4_CHECK_STATEID_CLOSED:
7772 7788 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7773 7789 goto end;
7774 7790 case NFS4_CHECK_STATEID_UNCONFIRMED:
7775 7791 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7776 7792 goto end;
7777 7793 case NFS4_CHECK_STATEID_REPLAY:
7778 7794 /* Check the sequence id for the open owner */
7779 7795 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7780 7796 resop)) {
7781 7797 case NFS4_CHKSEQ_OKAY:
7782 7798 /*
7783 7799 * This is replayed stateid; if seqid matches
7784 7800 * next expected, then client is using wrong seqid.
7785 7801 */
7786 7802 /* fall through */
7787 7803 case NFS4_CHKSEQ_BAD:
7788 7804 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7789 7805 goto end;
7790 7806 case NFS4_CHKSEQ_REPLAY:
7791 7807 /*
7792 7808 * Note this case is the duplicate case so
7793 7809 * resp->status is already set.
7794 7810 */
7795 7811 *cs->statusp = resp->status;
7796 7812 rfs4_update_lease(sp->rs_owner->ro_client);
7797 7813 goto end;
7798 7814 }
7799 7815 break;
7800 7816 default:
7801 7817 ASSERT(FALSE);
7802 7818 break;
7803 7819 }
7804 7820
7805 7821 rfs4_dbe_lock(sp->rs_dbe);
7806 7822 /*
7807 7823 * Check that the new access modes and deny modes are valid.
7808 7824 * Check that no invalid bits are set.
7809 7825 */
7810 7826 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7811 7827 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7812 7828 *cs->statusp = resp->status = NFS4ERR_INVAL;
7813 7829 rfs4_update_open_sequence(sp->rs_owner);
7814 7830 rfs4_dbe_unlock(sp->rs_dbe);
7815 7831 goto end;
7816 7832 }
7817 7833
7818 7834 /*
7819 7835 * The new modes must be a subset of the current modes and
7820 7836 * the access must specify at least one mode. To test that
7821 7837 * the new mode is a subset of the current modes we bitwise
7822 7838 * AND them together and check that the result equals the new
7823 7839 * mode. For example:
7824 7840 * New mode, access == R and current mode, sp->rs_open_access == RW
7825 7841 * access & sp->rs_open_access == R == access, so the new access mode
7826 7842 * is valid. Consider access == RW, sp->rs_open_access = R
7827 7843 * access & sp->rs_open_access == R != access, so the new access mode
7828 7844 * is invalid.
7829 7845 */
7830 7846 if ((access & sp->rs_open_access) != access ||
7831 7847 (deny & sp->rs_open_deny) != deny ||
7832 7848 (access &
7833 7849 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7834 7850 *cs->statusp = resp->status = NFS4ERR_INVAL;
7835 7851 rfs4_update_open_sequence(sp->rs_owner);
7836 7852 rfs4_dbe_unlock(sp->rs_dbe);
7837 7853 goto end;
7838 7854 }
7839 7855
7840 7856 /*
7841 7857 * Release any share locks associated with this stateID.
7842 7858 * Strictly speaking, this violates the spec because the
7843 7859 * spec effectively requires that open downgrade be atomic.
7844 7860 * At present, fs_shrlock does not have this capability.
7845 7861 */
7846 7862 (void) rfs4_unshare(sp);
7847 7863
7848 7864 status = rfs4_share(sp, access, deny);
7849 7865 if (status != NFS4_OK) {
7850 7866 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7851 7867 rfs4_update_open_sequence(sp->rs_owner);
7852 7868 rfs4_dbe_unlock(sp->rs_dbe);
7853 7869 goto end;
7854 7870 }
7855 7871
7856 7872 fp = sp->rs_finfo;
7857 7873 rfs4_dbe_lock(fp->rf_dbe);
7858 7874
7859 7875 /*
7860 7876 * If the current mode has deny read and the new mode
7861 7877 * does not, decrement the number of deny read mode bits
7862 7878 * and if it goes to zero turn off the deny read bit
7863 7879 * on the file.
7864 7880 */
7865 7881 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7866 7882 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7867 7883 fp->rf_deny_read--;
7868 7884 if (fp->rf_deny_read == 0)
7869 7885 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7870 7886 }
7871 7887
7872 7888 /*
7873 7889 * If the current mode has deny write and the new mode
7874 7890 * does not, decrement the number of deny write mode bits
7875 7891 * and if it goes to zero turn off the deny write bit
7876 7892 * on the file.
7877 7893 */
7878 7894 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7879 7895 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7880 7896 fp->rf_deny_write--;
7881 7897 if (fp->rf_deny_write == 0)
7882 7898 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7883 7899 }
7884 7900
7885 7901 /*
7886 7902 * If the current mode has access read and the new mode
7887 7903 * does not, decrement the number of access read mode bits
7888 7904 * and if it goes to zero turn off the access read bit
7889 7905 * on the file. set fflags to FREAD for the call to
7890 7906 * vn_open_downgrade().
7891 7907 */
7892 7908 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7893 7909 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7894 7910 fp->rf_access_read--;
7895 7911 if (fp->rf_access_read == 0)
7896 7912 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7897 7913 fflags |= FREAD;
7898 7914 }
7899 7915
7900 7916 /*
7901 7917 * If the current mode has access write and the new mode
7902 7918 * does not, decrement the number of access write mode bits
7903 7919 * and if it goes to zero turn off the access write bit
7904 7920 * on the file. set fflags to FWRITE for the call to
7905 7921 * vn_open_downgrade().
7906 7922 */
7907 7923 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7908 7924 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7909 7925 fp->rf_access_write--;
7910 7926 if (fp->rf_access_write == 0)
7911 7927 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7912 7928 fflags |= FWRITE;
7913 7929 }
7914 7930
7915 7931 /* Check that the file is still accessible */
7916 7932 ASSERT(fp->rf_share_access);
7917 7933
7918 7934 rfs4_dbe_unlock(fp->rf_dbe);
7919 7935
7920 7936 /* now set the new open access and deny modes */
7921 7937 sp->rs_open_access = access;
7922 7938 sp->rs_open_deny = deny;
7923 7939
7924 7940 /*
7925 7941 * we successfully downgraded the share lock, now we need to downgrade
7926 7942 * the open. it is possible that the downgrade was only for a deny
7927 7943 * mode and we have nothing else to do.
7928 7944 */
7929 7945 if ((fflags & (FREAD|FWRITE)) != 0)
7930 7946 vn_open_downgrade(cs->vp, fflags);
7931 7947
7932 7948 /* Update the stateid */
7933 7949 next_stateid(&sp->rs_stateid);
7934 7950 resp->open_stateid = sp->rs_stateid.stateid;
7935 7951
7936 7952 rfs4_dbe_unlock(sp->rs_dbe);
7937 7953
7938 7954 *cs->statusp = resp->status = NFS4_OK;
7939 7955 /* Update the lease */
7940 7956 rfs4_update_lease(sp->rs_owner->ro_client);
7941 7957 /* And the sequence */
7942 7958 rfs4_update_open_sequence(sp->rs_owner);
7943 7959 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7944 7960
7945 7961 end:
7946 7962 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7947 7963 rfs4_state_rele(sp);
7948 7964 out:
7949 7965 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7950 7966 OPEN_DOWNGRADE4res *, resp);
7951 7967 }
7952 7968
7953 7969 static void *
7954 7970 memstr(const void *s1, const char *s2, size_t n)
7955 7971 {
7956 7972 size_t l = strlen(s2);
7957 7973 char *p = (char *)s1;
7958 7974
7959 7975 while (n >= l) {
7960 7976 if (bcmp(p, s2, l) == 0)
7961 7977 return (p);
7962 7978 p++;
7963 7979 n--;
7964 7980 }
7965 7981
7966 7982 return (NULL);
7967 7983 }
7968 7984
7969 7985 /*
7970 7986 * The logic behind this function is detailed in the NFSv4 RFC in the
7971 7987 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
7972 7988 * that section for explicit guidance to server behavior for
7973 7989 * SETCLIENTID.
7974 7990 */
7975 7991 void
7976 7992 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7977 7993 struct svc_req *req, struct compound_state *cs)
7978 7994 {
7979 7995 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7980 7996 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7981 7997 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7982 7998 rfs4_clntip_t *ci;
7983 7999 bool_t create;
7984 8000 char *addr, *netid;
7985 8001 int len;
7986 8002
7987 8003 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7988 8004 SETCLIENTID4args *, args);
7989 8005 retry:
7990 8006 newcp = cp_confirmed = cp_unconfirmed = NULL;
7991 8007
7992 8008 /*
7993 8009 * Save the caller's IP address
7994 8010 */
7995 8011 args->client.cl_addr =
7996 8012 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7997 8013
7998 8014 /*
7999 8015 * Record if it is a Solaris client that cannot handle referrals.
8000 8016 */
8001 8017 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8002 8018 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8003 8019 /* Add a "yes, it's downrev" record */
8004 8020 create = TRUE;
8005 8021 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8006 8022 ASSERT(ci != NULL);
8007 8023 rfs4_dbe_rele(ci->ri_dbe);
8008 8024 } else {
8009 8025 /* Remove any previous record */
8010 8026 rfs4_invalidate_clntip(args->client.cl_addr);
8011 8027 }
8012 8028
8013 8029 /*
8014 8030 * In search of an EXISTING client matching the incoming
8015 8031 * request to establish a new client identifier at the server
8016 8032 */
8017 8033 create = TRUE;
8018 8034 cp = rfs4_findclient(&args->client, &create, NULL);
8019 8035
8020 8036 /* Should never happen */
8021 8037 ASSERT(cp != NULL);
8022 8038
8023 8039 if (cp == NULL) {
8024 8040 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8025 8041 goto out;
8026 8042 }
8027 8043
8028 8044 /*
8029 8045 * Easiest case. Client identifier is newly created and is
8030 8046 * unconfirmed. Also note that for this case, no other
8031 8047 * entries exist for the client identifier. Nothing else to
8032 8048 * check. Just setup the response and respond.
8033 8049 */
8034 8050 if (create) {
8035 8051 *cs->statusp = res->status = NFS4_OK;
8036 8052 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8037 8053 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8038 8054 cp->rc_confirm_verf;
8039 8055 /* Setup callback information; CB_NULL confirmation later */
8040 8056 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8041 8057
8042 8058 rfs4_client_rele(cp);
8043 8059 goto out;
8044 8060 }
8045 8061
8046 8062 /*
8047 8063 * An existing, confirmed client may exist but it may not have
8048 8064 * been active for at least one lease period. If so, then
8049 8065 * "close" the client and create a new client identifier
8050 8066 */
8051 8067 if (rfs4_lease_expired(cp)) {
8052 8068 rfs4_client_close(cp);
8053 8069 goto retry;
8054 8070 }
8055 8071
8056 8072 if (cp->rc_need_confirm == TRUE)
8057 8073 cp_unconfirmed = cp;
8058 8074 else
8059 8075 cp_confirmed = cp;
8060 8076
8061 8077 cp = NULL;
8062 8078
8063 8079 /*
8064 8080 * We have a confirmed client, now check for an
8065 8081 * unconfimred entry
8066 8082 */
8067 8083 if (cp_confirmed) {
8068 8084 /* If creds don't match then client identifier is inuse */
8069 8085 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8070 8086 rfs4_cbinfo_t *cbp;
8071 8087 /*
8072 8088 * Some one else has established this client
8073 8089 * id. Try and say * who they are. We will use
8074 8090 * the call back address supplied by * the
8075 8091 * first client.
8076 8092 */
8077 8093 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8078 8094
8079 8095 addr = netid = NULL;
8080 8096
8081 8097 cbp = &cp_confirmed->rc_cbinfo;
8082 8098 if (cbp->cb_callback.cb_location.r_addr &&
8083 8099 cbp->cb_callback.cb_location.r_netid) {
8084 8100 cb_client4 *cbcp = &cbp->cb_callback;
8085 8101
8086 8102 len = strlen(cbcp->cb_location.r_addr)+1;
8087 8103 addr = kmem_alloc(len, KM_SLEEP);
8088 8104 bcopy(cbcp->cb_location.r_addr, addr, len);
8089 8105 len = strlen(cbcp->cb_location.r_netid)+1;
8090 8106 netid = kmem_alloc(len, KM_SLEEP);
8091 8107 bcopy(cbcp->cb_location.r_netid, netid, len);
8092 8108 }
8093 8109
8094 8110 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8095 8111 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8096 8112
8097 8113 rfs4_client_rele(cp_confirmed);
8098 8114 }
8099 8115
8100 8116 /*
8101 8117 * Confirmed, creds match, and verifier matches; must
8102 8118 * be an update of the callback info
8103 8119 */
8104 8120 if (cp_confirmed->rc_nfs_client.verifier ==
8105 8121 args->client.verifier) {
8106 8122 /* Setup callback information */
8107 8123 rfs4_client_setcb(cp_confirmed, &args->callback,
8108 8124 args->callback_ident);
8109 8125
8110 8126 /* everything okay -- move ahead */
8111 8127 *cs->statusp = res->status = NFS4_OK;
8112 8128 res->SETCLIENTID4res_u.resok4.clientid =
8113 8129 cp_confirmed->rc_clientid;
8114 8130
8115 8131 /* update the confirm_verifier and return it */
8116 8132 rfs4_client_scv_next(cp_confirmed);
8117 8133 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8118 8134 cp_confirmed->rc_confirm_verf;
8119 8135
8120 8136 rfs4_client_rele(cp_confirmed);
8121 8137 goto out;
8122 8138 }
8123 8139
8124 8140 /*
8125 8141 * Creds match but the verifier doesn't. Must search
8126 8142 * for an unconfirmed client that would be replaced by
8127 8143 * this request.
8128 8144 */
8129 8145 create = FALSE;
8130 8146 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8131 8147 cp_confirmed);
8132 8148 }
8133 8149
8134 8150 /*
8135 8151 * At this point, we have taken care of the brand new client
8136 8152 * struct, INUSE case, update of an existing, and confirmed
8137 8153 * client struct.
8138 8154 */
8139 8155
8140 8156 /*
8141 8157 * check to see if things have changed while we originally
8142 8158 * picked up the client struct. If they have, then return and
8143 8159 * retry the processing of this SETCLIENTID request.
8144 8160 */
8145 8161 if (cp_unconfirmed) {
8146 8162 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8147 8163 if (!cp_unconfirmed->rc_need_confirm) {
8148 8164 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8149 8165 rfs4_client_rele(cp_unconfirmed);
8150 8166 if (cp_confirmed)
8151 8167 rfs4_client_rele(cp_confirmed);
8152 8168 goto retry;
8153 8169 }
8154 8170 /* do away with the old unconfirmed one */
8155 8171 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8156 8172 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8157 8173 rfs4_client_rele(cp_unconfirmed);
8158 8174 cp_unconfirmed = NULL;
8159 8175 }
8160 8176
8161 8177 /*
8162 8178 * This search will temporarily hide the confirmed client
8163 8179 * struct while a new client struct is created as the
8164 8180 * unconfirmed one.
8165 8181 */
8166 8182 create = TRUE;
8167 8183 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8168 8184
8169 8185 ASSERT(newcp != NULL);
8170 8186
8171 8187 if (newcp == NULL) {
8172 8188 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8173 8189 rfs4_client_rele(cp_confirmed);
8174 8190 goto out;
8175 8191 }
8176 8192
8177 8193 /*
8178 8194 * If one was not created, then a similar request must be in
8179 8195 * process so release and start over with this one
8180 8196 */
8181 8197 if (create != TRUE) {
8182 8198 rfs4_client_rele(newcp);
8183 8199 if (cp_confirmed)
8184 8200 rfs4_client_rele(cp_confirmed);
8185 8201 goto retry;
8186 8202 }
8187 8203
8188 8204 *cs->statusp = res->status = NFS4_OK;
8189 8205 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8190 8206 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8191 8207 newcp->rc_confirm_verf;
8192 8208 /* Setup callback information; CB_NULL confirmation later */
8193 8209 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8194 8210
8195 8211 newcp->rc_cp_confirmed = cp_confirmed;
8196 8212
8197 8213 rfs4_client_rele(newcp);
8198 8214
8199 8215 out:
8200 8216 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8201 8217 SETCLIENTID4res *, res);
8202 8218 }
8203 8219
8204 8220 /*ARGSUSED*/
8205 8221 void
8206 8222 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8207 8223 struct svc_req *req, struct compound_state *cs)
8208 8224 {
8209 8225 SETCLIENTID_CONFIRM4args *args =
8210 8226 &argop->nfs_argop4_u.opsetclientid_confirm;
8211 8227 SETCLIENTID_CONFIRM4res *res =
8212 8228 &resop->nfs_resop4_u.opsetclientid_confirm;
8213 8229 rfs4_client_t *cp, *cptoclose = NULL;
8214 8230 nfs4_srv_t *nsrv4;
8215 8231
8216 8232 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8217 8233 struct compound_state *, cs,
8218 8234 SETCLIENTID_CONFIRM4args *, args);
8219 8235
8220 8236 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
8221 8237 *cs->statusp = res->status = NFS4_OK;
8222 8238
8223 8239 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8224 8240
8225 8241 if (cp == NULL) {
8226 8242 *cs->statusp = res->status =
8227 8243 rfs4_check_clientid(&args->clientid, 1);
8228 8244 goto out;
8229 8245 }
8230 8246
8231 8247 if (!creds_ok(cp, req, cs)) {
8232 8248 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8233 8249 rfs4_client_rele(cp);
8234 8250 goto out;
8235 8251 }
8236 8252
8237 8253 /* If the verifier doesn't match, the record doesn't match */
8238 8254 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8239 8255 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8240 8256 rfs4_client_rele(cp);
8241 8257 goto out;
8242 8258 }
8243 8259
8244 8260 rfs4_dbe_lock(cp->rc_dbe);
8245 8261 cp->rc_need_confirm = FALSE;
8246 8262 if (cp->rc_cp_confirmed) {
8247 8263 cptoclose = cp->rc_cp_confirmed;
8248 8264 cptoclose->rc_ss_remove = 1;
8249 8265 cp->rc_cp_confirmed = NULL;
8250 8266 }
8251 8267
8252 8268 /*
8253 8269 * Update the client's associated server instance, if it's changed
8254 8270 * since the client was created.
8255 8271 */
8256 8272 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8257 8273 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8258 8274
8259 8275 /*
8260 8276 * Record clientid in stable storage.
8261 8277 * Must be done after server instance has been assigned.
8262 8278 */
8263 8279 rfs4_ss_clid(nsrv4, cp);
8264 8280
8265 8281 rfs4_dbe_unlock(cp->rc_dbe);
8266 8282
8267 8283 if (cptoclose)
8268 8284 /* don't need to rele, client_close does it */
8269 8285 rfs4_client_close(cptoclose);
8270 8286
8271 8287 /* If needed, initiate CB_NULL call for callback path */
8272 8288 rfs4_deleg_cb_check(cp);
8273 8289 rfs4_update_lease(cp);
8274 8290
8275 8291 /*
8276 8292 * Check to see if client can perform reclaims
8277 8293 */
8278 8294 rfs4_ss_chkclid(nsrv4, cp);
8279 8295
8280 8296 rfs4_client_rele(cp);
8281 8297
8282 8298 out:
8283 8299 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8284 8300 struct compound_state *, cs,
8285 8301 SETCLIENTID_CONFIRM4 *, res);
8286 8302 }
8287 8303
8288 8304
8289 8305 /*ARGSUSED*/
8290 8306 void
8291 8307 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8292 8308 struct svc_req *req, struct compound_state *cs)
8293 8309 {
8294 8310 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8295 8311 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8296 8312 rfs4_state_t *sp;
8297 8313 nfsstat4 status;
8298 8314
8299 8315 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8300 8316 CLOSE4args *, args);
8301 8317
8302 8318 if (cs->vp == NULL) {
8303 8319 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8304 8320 goto out;
8305 8321 }
8306 8322
8307 8323 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8308 8324 if (status != NFS4_OK) {
8309 8325 *cs->statusp = resp->status = status;
8310 8326 goto out;
8311 8327 }
8312 8328
8313 8329 /* Ensure specified filehandle matches */
8314 8330 if (cs->vp != sp->rs_finfo->rf_vp) {
8315 8331 rfs4_state_rele(sp);
8316 8332 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8317 8333 goto out;
8318 8334 }
8319 8335
8320 8336 /* hold off other access to open_owner while we tinker */
8321 8337 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8322 8338
8323 8339 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8324 8340 case NFS4_CHECK_STATEID_OKAY:
8325 8341 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8326 8342 resop) != NFS4_CHKSEQ_OKAY) {
8327 8343 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8328 8344 goto end;
8329 8345 }
8330 8346 break;
8331 8347 case NFS4_CHECK_STATEID_OLD:
8332 8348 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8333 8349 goto end;
8334 8350 case NFS4_CHECK_STATEID_BAD:
8335 8351 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8336 8352 goto end;
8337 8353 case NFS4_CHECK_STATEID_EXPIRED:
8338 8354 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8339 8355 goto end;
8340 8356 case NFS4_CHECK_STATEID_CLOSED:
8341 8357 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8342 8358 goto end;
8343 8359 case NFS4_CHECK_STATEID_UNCONFIRMED:
8344 8360 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8345 8361 goto end;
8346 8362 case NFS4_CHECK_STATEID_REPLAY:
8347 8363 /* Check the sequence id for the open owner */
8348 8364 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8349 8365 resop)) {
8350 8366 case NFS4_CHKSEQ_OKAY:
8351 8367 /*
8352 8368 * This is replayed stateid; if seqid matches
8353 8369 * next expected, then client is using wrong seqid.
8354 8370 */
8355 8371 /* FALL THROUGH */
8356 8372 case NFS4_CHKSEQ_BAD:
8357 8373 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8358 8374 goto end;
8359 8375 case NFS4_CHKSEQ_REPLAY:
8360 8376 /*
8361 8377 * Note this case is the duplicate case so
8362 8378 * resp->status is already set.
8363 8379 */
8364 8380 *cs->statusp = resp->status;
8365 8381 rfs4_update_lease(sp->rs_owner->ro_client);
8366 8382 goto end;
8367 8383 }
8368 8384 break;
8369 8385 default:
8370 8386 ASSERT(FALSE);
8371 8387 break;
8372 8388 }
8373 8389
8374 8390 rfs4_dbe_lock(sp->rs_dbe);
8375 8391
8376 8392 /* Update the stateid. */
8377 8393 next_stateid(&sp->rs_stateid);
8378 8394 resp->open_stateid = sp->rs_stateid.stateid;
8379 8395
8380 8396 rfs4_dbe_unlock(sp->rs_dbe);
8381 8397
8382 8398 rfs4_update_lease(sp->rs_owner->ro_client);
8383 8399 rfs4_update_open_sequence(sp->rs_owner);
8384 8400 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8385 8401
8386 8402 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8387 8403
8388 8404 *cs->statusp = resp->status = status;
8389 8405
8390 8406 end:
8391 8407 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8392 8408 rfs4_state_rele(sp);
8393 8409 out:
8394 8410 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8395 8411 CLOSE4res *, resp);
8396 8412 }
8397 8413
8398 8414 /*
8399 8415 * Manage the counts on the file struct and close all file locks
8400 8416 */
8401 8417 /*ARGSUSED*/
8402 8418 void
8403 8419 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8404 8420 bool_t close_of_client)
8405 8421 {
8406 8422 rfs4_file_t *fp = sp->rs_finfo;
8407 8423 rfs4_lo_state_t *lsp;
8408 8424 int fflags = 0;
8409 8425
8410 8426 /*
8411 8427 * If this call is part of the larger closing down of client
8412 8428 * state then it is just easier to release all locks
8413 8429 * associated with this client instead of going through each
8414 8430 * individual file and cleaning locks there.
8415 8431 */
8416 8432 if (close_of_client) {
8417 8433 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8418 8434 !list_is_empty(&sp->rs_lostatelist) &&
8419 8435 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8420 8436 /* Is the PxFS kernel module loaded? */
8421 8437 if (lm_remove_file_locks != NULL) {
8422 8438 int new_sysid;
8423 8439
8424 8440 /* Encode the cluster nodeid in new sysid */
8425 8441 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8426 8442 lm_set_nlmid_flk(&new_sysid);
8427 8443
8428 8444 /*
8429 8445 * This PxFS routine removes file locks for a
8430 8446 * client over all nodes of a cluster.
8431 8447 */
8432 8448 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8433 8449 "lm_remove_file_locks(sysid=0x%x)\n",
8434 8450 new_sysid));
8435 8451 (*lm_remove_file_locks)(new_sysid);
8436 8452 } else {
8437 8453 struct flock64 flk;
8438 8454
8439 8455 /* Release all locks for this client */
8440 8456 flk.l_type = F_UNLKSYS;
8441 8457 flk.l_whence = 0;
8442 8458 flk.l_start = 0;
8443 8459 flk.l_len = 0;
8444 8460 flk.l_sysid =
8445 8461 sp->rs_owner->ro_client->rc_sysidt;
8446 8462 flk.l_pid = 0;
8447 8463 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8448 8464 &flk, F_REMOTELOCK | FREAD | FWRITE,
8449 8465 (u_offset_t)0, NULL, CRED(), NULL);
8450 8466 }
8451 8467
8452 8468 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8453 8469 }
8454 8470 }
8455 8471
8456 8472 /*
8457 8473 * Release all locks on this file by this lock owner or at
8458 8474 * least mark the locks as having been released
8459 8475 */
8460 8476 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8461 8477 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8462 8478 lsp->rls_locks_cleaned = TRUE;
8463 8479
8464 8480 /* Was this already taken care of above? */
8465 8481 if (!close_of_client &&
8466 8482 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8467 8483 (void) cleanlocks(sp->rs_finfo->rf_vp,
8468 8484 lsp->rls_locker->rl_pid,
8469 8485 lsp->rls_locker->rl_client->rc_sysidt);
8470 8486 }
8471 8487
8472 8488 /*
8473 8489 * Release any shrlocks associated with this open state ID.
8474 8490 * This must be done before the rfs4_state gets marked closed.
8475 8491 */
8476 8492 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8477 8493 (void) rfs4_unshare(sp);
8478 8494
8479 8495 if (sp->rs_open_access) {
8480 8496 rfs4_dbe_lock(fp->rf_dbe);
8481 8497
8482 8498 /*
8483 8499 * Decrement the count for each access and deny bit that this
8484 8500 * state has contributed to the file.
8485 8501 * If the file counts go to zero
8486 8502 * clear the appropriate bit in the appropriate mask.
8487 8503 */
8488 8504 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8489 8505 fp->rf_access_read--;
8490 8506 fflags |= FREAD;
8491 8507 if (fp->rf_access_read == 0)
8492 8508 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8493 8509 }
8494 8510 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8495 8511 fp->rf_access_write--;
8496 8512 fflags |= FWRITE;
8497 8513 if (fp->rf_access_write == 0)
8498 8514 fp->rf_share_access &=
8499 8515 ~OPEN4_SHARE_ACCESS_WRITE;
8500 8516 }
8501 8517 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8502 8518 fp->rf_deny_read--;
8503 8519 if (fp->rf_deny_read == 0)
8504 8520 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8505 8521 }
8506 8522 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8507 8523 fp->rf_deny_write--;
8508 8524 if (fp->rf_deny_write == 0)
8509 8525 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8510 8526 }
8511 8527
8512 8528 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8513 8529
8514 8530 rfs4_dbe_unlock(fp->rf_dbe);
8515 8531
8516 8532 sp->rs_open_access = 0;
8517 8533 sp->rs_open_deny = 0;
8518 8534 }
8519 8535 }
8520 8536
8521 8537 /*
8522 8538 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8523 8539 */
8524 8540 static nfsstat4
8525 8541 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8526 8542 {
8527 8543 rfs4_lockowner_t *lo;
8528 8544 rfs4_client_t *cp;
8529 8545 uint32_t len;
8530 8546
8531 8547 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8532 8548 if (lo != NULL) {
8533 8549 cp = lo->rl_client;
8534 8550 if (rfs4_lease_expired(cp)) {
8535 8551 rfs4_lockowner_rele(lo);
8536 8552 rfs4_dbe_hold(cp->rc_dbe);
8537 8553 rfs4_client_close(cp);
8538 8554 return (NFS4ERR_EXPIRED);
8539 8555 }
8540 8556 dp->owner.clientid = lo->rl_owner.clientid;
8541 8557 len = lo->rl_owner.owner_len;
8542 8558 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8543 8559 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8544 8560 dp->owner.owner_len = len;
8545 8561 rfs4_lockowner_rele(lo);
8546 8562 goto finish;
8547 8563 }
8548 8564
8549 8565 /*
8550 8566 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8551 8567 * of the client id contain the boot time for a NFS4 lock. So we
8552 8568 * fabricate and identity by setting clientid to the sysid, and
8553 8569 * the lock owner to the pid.
8554 8570 */
8555 8571 dp->owner.clientid = flk->l_sysid;
8556 8572 len = sizeof (pid_t);
8557 8573 dp->owner.owner_len = len;
8558 8574 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8559 8575 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8560 8576 finish:
8561 8577 dp->offset = flk->l_start;
8562 8578 dp->length = flk->l_len;
8563 8579
8564 8580 if (flk->l_type == F_RDLCK)
8565 8581 dp->locktype = READ_LT;
8566 8582 else if (flk->l_type == F_WRLCK)
8567 8583 dp->locktype = WRITE_LT;
8568 8584 else
8569 8585 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8570 8586
8571 8587 return (NFS4_OK);
8572 8588 }
8573 8589
8574 8590 /*
8575 8591 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8576 8592 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8577 8593 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8578 8594 * for that (obviously); they are sending the LOCK requests with some delays
8579 8595 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8580 8596 * locking and delay implementation at the client side.
8581 8597 *
8582 8598 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8583 8599 * fast retries on its own (the for loop below) in a hope the lock will be
8584 8600 * available soon. And if not, the client won't need to resend the LOCK
8585 8601 * requests so fast to check the lock availability. This basically saves some
8586 8602 * network traffic and tries to make sure the client gets the lock ASAP.
8587 8603 */
8588 8604 static int
8589 8605 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8590 8606 {
8591 8607 int error;
8592 8608 struct flock64 flk;
8593 8609 int i;
8594 8610 clock_t delaytime;
8595 8611 int cmd;
8596 8612 int spin_cnt = 0;
8597 8613
8598 8614 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8599 8615 retry:
8600 8616 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8601 8617
8602 8618 for (i = 0; i < rfs4_maxlock_tries; i++) {
8603 8619 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8604 8620 error = VOP_FRLOCK(vp, cmd,
8605 8621 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8606 8622
8607 8623 if (error != EAGAIN && error != EACCES)
8608 8624 break;
8609 8625
8610 8626 if (i < rfs4_maxlock_tries - 1) {
8611 8627 delay(delaytime);
8612 8628 delaytime *= 2;
8613 8629 }
8614 8630 }
8615 8631
8616 8632 if (error == EAGAIN || error == EACCES) {
8617 8633 /* Get the owner of the lock */
8618 8634 flk = *flock;
8619 8635 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8620 8636 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8621 8637 NULL) == 0) {
8622 8638 /*
8623 8639 * There's a race inherent in the current VOP_FRLOCK
8624 8640 * design where:
8625 8641 * a: "other guy" takes a lock that conflicts with a
8626 8642 * lock we want
8627 8643 * b: we attempt to take our lock (non-blocking) and
8628 8644 * the attempt fails.
8629 8645 * c: "other guy" releases the conflicting lock
8630 8646 * d: we ask what lock conflicts with the lock we want,
8631 8647 * getting F_UNLCK (no lock blocks us)
8632 8648 *
8633 8649 * If we retry the non-blocking lock attempt in this
8634 8650 * case (restart at step 'b') there's some possibility
8635 8651 * that many such attempts might fail. However a test
8636 8652 * designed to actually provoke this race shows that
8637 8653 * the vast majority of cases require no retry, and
8638 8654 * only a few took as many as three retries. Here's
8639 8655 * the test outcome:
8640 8656 *
8641 8657 * number of retries how many times we needed
8642 8658 * that many retries
8643 8659 * 0 79461
8644 8660 * 1 862
8645 8661 * 2 49
8646 8662 * 3 5
8647 8663 *
8648 8664 * Given those empirical results, we arbitrarily limit
8649 8665 * the retry count to ten.
8650 8666 *
8651 8667 * If we actually make to ten retries and give up,
8652 8668 * nothing catastrophic happens, but we're unable to
8653 8669 * return the information about the conflicting lock to
8654 8670 * the NFS client. That's an acceptable trade off vs.
8655 8671 * letting this retry loop run forever.
8656 8672 */
8657 8673 if (flk.l_type == F_UNLCK) {
8658 8674 if (spin_cnt++ < 10) {
8659 8675 /* No longer locked, retry */
8660 8676 goto retry;
8661 8677 }
8662 8678 } else {
8663 8679 *flock = flk;
8664 8680 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8665 8681 F_GETLK, &flk);
8666 8682 }
8667 8683 }
8668 8684 }
8669 8685
8670 8686 return (error);
8671 8687 }
8672 8688
8673 8689 /*ARGSUSED*/
8674 8690 static nfsstat4
8675 8691 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8676 8692 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8677 8693 {
8678 8694 nfsstat4 status;
8679 8695 rfs4_lockowner_t *lo = lsp->rls_locker;
8680 8696 rfs4_state_t *sp = lsp->rls_state;
8681 8697 struct flock64 flock;
8682 8698 int16_t ltype;
8683 8699 int flag;
8684 8700 int error;
8685 8701 sysid_t sysid;
8686 8702 LOCK4res *lres;
8687 8703 vnode_t *vp;
8688 8704
8689 8705 if (rfs4_lease_expired(lo->rl_client)) {
8690 8706 return (NFS4ERR_EXPIRED);
8691 8707 }
8692 8708
8693 8709 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8694 8710 return (status);
8695 8711
8696 8712 /* Check for zero length. To lock to end of file use all ones for V4 */
8697 8713 if (length == 0)
8698 8714 return (NFS4ERR_INVAL);
8699 8715 else if (length == (length4)(~0))
8700 8716 length = 0; /* Posix to end of file */
8701 8717
8702 8718 retry:
8703 8719 rfs4_dbe_lock(sp->rs_dbe);
8704 8720 if (sp->rs_closed == TRUE) {
8705 8721 rfs4_dbe_unlock(sp->rs_dbe);
8706 8722 return (NFS4ERR_OLD_STATEID);
8707 8723 }
8708 8724
8709 8725 if (resop->resop != OP_LOCKU) {
8710 8726 switch (locktype) {
8711 8727 case READ_LT:
8712 8728 case READW_LT:
8713 8729 if ((sp->rs_share_access
8714 8730 & OPEN4_SHARE_ACCESS_READ) == 0) {
8715 8731 rfs4_dbe_unlock(sp->rs_dbe);
8716 8732
8717 8733 return (NFS4ERR_OPENMODE);
8718 8734 }
8719 8735 ltype = F_RDLCK;
8720 8736 break;
8721 8737 case WRITE_LT:
8722 8738 case WRITEW_LT:
8723 8739 if ((sp->rs_share_access
8724 8740 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8725 8741 rfs4_dbe_unlock(sp->rs_dbe);
8726 8742
8727 8743 return (NFS4ERR_OPENMODE);
8728 8744 }
8729 8745 ltype = F_WRLCK;
8730 8746 break;
8731 8747 }
8732 8748 } else
8733 8749 ltype = F_UNLCK;
8734 8750
8735 8751 flock.l_type = ltype;
8736 8752 flock.l_whence = 0; /* SEEK_SET */
8737 8753 flock.l_start = offset;
8738 8754 flock.l_len = length;
8739 8755 flock.l_sysid = sysid;
8740 8756 flock.l_pid = lsp->rls_locker->rl_pid;
8741 8757
8742 8758 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8743 8759 if (flock.l_len < 0 || flock.l_start < 0) {
8744 8760 rfs4_dbe_unlock(sp->rs_dbe);
8745 8761 return (NFS4ERR_INVAL);
8746 8762 }
8747 8763
8748 8764 /*
8749 8765 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8750 8766 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8751 8767 */
8752 8768 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8753 8769
8754 8770 vp = sp->rs_finfo->rf_vp;
8755 8771 VN_HOLD(vp);
8756 8772
8757 8773 /*
8758 8774 * We need to unlock sp before we call the underlying filesystem to
8759 8775 * acquire the file lock.
8760 8776 */
8761 8777 rfs4_dbe_unlock(sp->rs_dbe);
8762 8778
8763 8779 error = setlock(vp, &flock, flag, cred);
8764 8780
8765 8781 /*
8766 8782 * Make sure the file is still open. In a case the file was closed in
8767 8783 * the meantime, clean the lock we acquired using the setlock() call
8768 8784 * above, and return the appropriate error.
8769 8785 */
8770 8786 rfs4_dbe_lock(sp->rs_dbe);
8771 8787 if (sp->rs_closed == TRUE) {
8772 8788 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8773 8789 rfs4_dbe_unlock(sp->rs_dbe);
8774 8790
8775 8791 VN_RELE(vp);
8776 8792
8777 8793 return (NFS4ERR_OLD_STATEID);
8778 8794 }
8779 8795 rfs4_dbe_unlock(sp->rs_dbe);
8780 8796
8781 8797 VN_RELE(vp);
8782 8798
8783 8799 if (error == 0) {
8784 8800 rfs4_dbe_lock(lsp->rls_dbe);
8785 8801 next_stateid(&lsp->rls_lockid);
8786 8802 rfs4_dbe_unlock(lsp->rls_dbe);
8787 8803 }
8788 8804
8789 8805 /*
8790 8806 * N.B. We map error values to nfsv4 errors. This is differrent
8791 8807 * than puterrno4 routine.
8792 8808 */
8793 8809 switch (error) {
8794 8810 case 0:
8795 8811 status = NFS4_OK;
8796 8812 break;
8797 8813 case EAGAIN:
8798 8814 case EACCES: /* Old value */
8799 8815 /* Can only get here if op is OP_LOCK */
8800 8816 ASSERT(resop->resop == OP_LOCK);
8801 8817 lres = &resop->nfs_resop4_u.oplock;
8802 8818 status = NFS4ERR_DENIED;
8803 8819 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8804 8820 == NFS4ERR_EXPIRED)
8805 8821 goto retry;
8806 8822 break;
8807 8823 case ENOLCK:
8808 8824 status = NFS4ERR_DELAY;
8809 8825 break;
8810 8826 case EOVERFLOW:
8811 8827 status = NFS4ERR_INVAL;
8812 8828 break;
8813 8829 case EINVAL:
8814 8830 status = NFS4ERR_NOTSUPP;
8815 8831 break;
8816 8832 default:
8817 8833 status = NFS4ERR_SERVERFAULT;
8818 8834 break;
8819 8835 }
8820 8836
8821 8837 return (status);
8822 8838 }
8823 8839
8824 8840 /*ARGSUSED*/
8825 8841 void
8826 8842 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8827 8843 struct svc_req *req, struct compound_state *cs)
8828 8844 {
8829 8845 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8830 8846 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8831 8847 nfsstat4 status;
8832 8848 stateid4 *stateid;
8833 8849 rfs4_lockowner_t *lo;
8834 8850 rfs4_client_t *cp;
8835 8851 rfs4_state_t *sp = NULL;
8836 8852 rfs4_lo_state_t *lsp = NULL;
8837 8853 bool_t ls_sw_held = FALSE;
8838 8854 bool_t create = TRUE;
8839 8855 bool_t lcreate = TRUE;
8840 8856 bool_t dup_lock = FALSE;
8841 8857 int rc;
8842 8858
8843 8859 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8844 8860 LOCK4args *, args);
8845 8861
8846 8862 if (cs->vp == NULL) {
8847 8863 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8848 8864 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8849 8865 cs, LOCK4res *, resp);
8850 8866 return;
8851 8867 }
8852 8868
8853 8869 if (args->locker.new_lock_owner) {
8854 8870 /* Create a new lockowner for this instance */
8855 8871 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8856 8872
8857 8873 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8858 8874
8859 8875 stateid = &olo->open_stateid;
8860 8876 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8861 8877 if (status != NFS4_OK) {
8862 8878 NFS4_DEBUG(rfs4_debug,
8863 8879 (CE_NOTE, "Get state failed in lock %d", status));
8864 8880 *cs->statusp = resp->status = status;
8865 8881 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8866 8882 cs, LOCK4res *, resp);
8867 8883 return;
8868 8884 }
8869 8885
8870 8886 /* Ensure specified filehandle matches */
8871 8887 if (cs->vp != sp->rs_finfo->rf_vp) {
8872 8888 rfs4_state_rele(sp);
8873 8889 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8874 8890 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8875 8891 cs, LOCK4res *, resp);
8876 8892 return;
8877 8893 }
8878 8894
8879 8895 /* hold off other access to open_owner while we tinker */
8880 8896 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8881 8897
8882 8898 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8883 8899 case NFS4_CHECK_STATEID_OLD:
8884 8900 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8885 8901 goto end;
8886 8902 case NFS4_CHECK_STATEID_BAD:
8887 8903 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8888 8904 goto end;
8889 8905 case NFS4_CHECK_STATEID_EXPIRED:
8890 8906 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8891 8907 goto end;
8892 8908 case NFS4_CHECK_STATEID_UNCONFIRMED:
8893 8909 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8894 8910 goto end;
8895 8911 case NFS4_CHECK_STATEID_CLOSED:
8896 8912 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8897 8913 goto end;
8898 8914 case NFS4_CHECK_STATEID_OKAY:
8899 8915 case NFS4_CHECK_STATEID_REPLAY:
8900 8916 switch (rfs4_check_olo_seqid(olo->open_seqid,
8901 8917 sp->rs_owner, resop)) {
8902 8918 case NFS4_CHKSEQ_OKAY:
8903 8919 if (rc == NFS4_CHECK_STATEID_OKAY)
8904 8920 break;
8905 8921 /*
8906 8922 * This is replayed stateid; if seqid
8907 8923 * matches next expected, then client
8908 8924 * is using wrong seqid.
8909 8925 */
8910 8926 /* FALLTHROUGH */
8911 8927 case NFS4_CHKSEQ_BAD:
8912 8928 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8913 8929 goto end;
8914 8930 case NFS4_CHKSEQ_REPLAY:
8915 8931 /* This is a duplicate LOCK request */
8916 8932 dup_lock = TRUE;
8917 8933
8918 8934 /*
8919 8935 * For a duplicate we do not want to
8920 8936 * create a new lockowner as it should
8921 8937 * already exist.
8922 8938 * Turn off the lockowner create flag.
8923 8939 */
8924 8940 lcreate = FALSE;
8925 8941 }
8926 8942 break;
8927 8943 }
8928 8944
8929 8945 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8930 8946 if (lo == NULL) {
8931 8947 NFS4_DEBUG(rfs4_debug,
8932 8948 (CE_NOTE, "rfs4_op_lock: no lock owner"));
8933 8949 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8934 8950 goto end;
8935 8951 }
8936 8952
8937 8953 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8938 8954 if (lsp == NULL) {
8939 8955 rfs4_update_lease(sp->rs_owner->ro_client);
8940 8956 /*
8941 8957 * Only update theh open_seqid if this is not
8942 8958 * a duplicate request
8943 8959 */
8944 8960 if (dup_lock == FALSE) {
8945 8961 rfs4_update_open_sequence(sp->rs_owner);
8946 8962 }
8947 8963
8948 8964 NFS4_DEBUG(rfs4_debug,
8949 8965 (CE_NOTE, "rfs4_op_lock: no state"));
8950 8966 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8951 8967 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8952 8968 rfs4_lockowner_rele(lo);
8953 8969 goto end;
8954 8970 }
8955 8971
8956 8972 /*
8957 8973 * This is the new_lock_owner branch and the client is
8958 8974 * supposed to be associating a new lock_owner with
8959 8975 * the open file at this point. If we find that a
8960 8976 * lock_owner/state association already exists and a
8961 8977 * successful LOCK request was returned to the client,
8962 8978 * an error is returned to the client since this is
8963 8979 * not appropriate. The client should be using the
8964 8980 * existing lock_owner branch.
8965 8981 */
8966 8982 if (dup_lock == FALSE && create == FALSE) {
8967 8983 if (lsp->rls_lock_completed == TRUE) {
8968 8984 *cs->statusp =
8969 8985 resp->status = NFS4ERR_BAD_SEQID;
8970 8986 rfs4_lockowner_rele(lo);
8971 8987 goto end;
8972 8988 }
8973 8989 }
8974 8990
8975 8991 rfs4_update_lease(sp->rs_owner->ro_client);
8976 8992
8977 8993 /*
8978 8994 * Only update theh open_seqid if this is not
8979 8995 * a duplicate request
8980 8996 */
8981 8997 if (dup_lock == FALSE) {
8982 8998 rfs4_update_open_sequence(sp->rs_owner);
8983 8999 }
8984 9000
8985 9001 /*
8986 9002 * If this is a duplicate lock request, just copy the
8987 9003 * previously saved reply and return.
8988 9004 */
8989 9005 if (dup_lock == TRUE) {
8990 9006 /* verify that lock_seqid's match */
8991 9007 if (lsp->rls_seqid != olo->lock_seqid) {
8992 9008 NFS4_DEBUG(rfs4_debug,
8993 9009 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8994 9010 "lsp->seqid=%d old->seqid=%d",
8995 9011 lsp->rls_seqid, olo->lock_seqid));
8996 9012 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8997 9013 } else {
8998 9014 rfs4_copy_reply(resop, &lsp->rls_reply);
8999 9015 /*
9000 9016 * Make sure to copy the just
9001 9017 * retrieved reply status into the
9002 9018 * overall compound status
9003 9019 */
9004 9020 *cs->statusp = resp->status;
9005 9021 }
9006 9022 rfs4_lockowner_rele(lo);
9007 9023 goto end;
9008 9024 }
9009 9025
9010 9026 rfs4_dbe_lock(lsp->rls_dbe);
9011 9027
9012 9028 /* Make sure to update the lock sequence id */
9013 9029 lsp->rls_seqid = olo->lock_seqid;
9014 9030
9015 9031 NFS4_DEBUG(rfs4_debug,
9016 9032 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9017 9033
9018 9034 /*
9019 9035 * This is used to signify the newly created lockowner
9020 9036 * stateid and its sequence number. The checks for
9021 9037 * sequence number and increment don't occur on the
9022 9038 * very first lock request for a lockowner.
9023 9039 */
9024 9040 lsp->rls_skip_seqid_check = TRUE;
9025 9041
9026 9042 /* hold off other access to lsp while we tinker */
9027 9043 rfs4_sw_enter(&lsp->rls_sw);
9028 9044 ls_sw_held = TRUE;
9029 9045
9030 9046 rfs4_dbe_unlock(lsp->rls_dbe);
9031 9047
9032 9048 rfs4_lockowner_rele(lo);
9033 9049 } else {
9034 9050 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9035 9051 /* get lsp and hold the lock on the underlying file struct */
9036 9052 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9037 9053 != NFS4_OK) {
9038 9054 *cs->statusp = resp->status = status;
9039 9055 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9040 9056 cs, LOCK4res *, resp);
9041 9057 return;
9042 9058 }
9043 9059 create = FALSE; /* We didn't create lsp */
9044 9060
9045 9061 /* Ensure specified filehandle matches */
9046 9062 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9047 9063 rfs4_lo_state_rele(lsp, TRUE);
9048 9064 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9049 9065 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9050 9066 cs, LOCK4res *, resp);
9051 9067 return;
9052 9068 }
9053 9069
9054 9070 /* hold off other access to lsp while we tinker */
9055 9071 rfs4_sw_enter(&lsp->rls_sw);
9056 9072 ls_sw_held = TRUE;
9057 9073
9058 9074 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9059 9075 /*
9060 9076 * The stateid looks like it was okay (expected to be
9061 9077 * the next one)
9062 9078 */
9063 9079 case NFS4_CHECK_STATEID_OKAY:
9064 9080 /*
9065 9081 * The sequence id is now checked. Determine
9066 9082 * if this is a replay or if it is in the
9067 9083 * expected (next) sequence. In the case of a
9068 9084 * replay, there are two replay conditions
9069 9085 * that may occur. The first is the normal
9070 9086 * condition where a LOCK is done with a
9071 9087 * NFS4_OK response and the stateid is
9072 9088 * updated. That case is handled below when
9073 9089 * the stateid is identified as a REPLAY. The
9074 9090 * second is the case where an error is
9075 9091 * returned, like NFS4ERR_DENIED, and the
9076 9092 * sequence number is updated but the stateid
9077 9093 * is not updated. This second case is dealt
9078 9094 * with here. So it may seem odd that the
9079 9095 * stateid is okay but the sequence id is a
9080 9096 * replay but it is okay.
9081 9097 */
9082 9098 switch (rfs4_check_lock_seqid(
9083 9099 args->locker.locker4_u.lock_owner.lock_seqid,
9084 9100 lsp, resop)) {
9085 9101 case NFS4_CHKSEQ_REPLAY:
9086 9102 if (resp->status != NFS4_OK) {
9087 9103 /*
9088 9104 * Here is our replay and need
9089 9105 * to verify that the last
9090 9106 * response was an error.
9091 9107 */
9092 9108 *cs->statusp = resp->status;
9093 9109 goto end;
9094 9110 }
9095 9111 /*
9096 9112 * This is done since the sequence id
9097 9113 * looked like a replay but it didn't
9098 9114 * pass our check so a BAD_SEQID is
9099 9115 * returned as a result.
9100 9116 */
9101 9117 /*FALLTHROUGH*/
9102 9118 case NFS4_CHKSEQ_BAD:
9103 9119 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9104 9120 goto end;
9105 9121 case NFS4_CHKSEQ_OKAY:
9106 9122 /* Everything looks okay move ahead */
9107 9123 break;
9108 9124 }
9109 9125 break;
9110 9126 case NFS4_CHECK_STATEID_OLD:
9111 9127 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9112 9128 goto end;
9113 9129 case NFS4_CHECK_STATEID_BAD:
9114 9130 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9115 9131 goto end;
9116 9132 case NFS4_CHECK_STATEID_EXPIRED:
9117 9133 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9118 9134 goto end;
9119 9135 case NFS4_CHECK_STATEID_CLOSED:
9120 9136 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9121 9137 goto end;
9122 9138 case NFS4_CHECK_STATEID_REPLAY:
9123 9139 switch (rfs4_check_lock_seqid(
9124 9140 args->locker.locker4_u.lock_owner.lock_seqid,
9125 9141 lsp, resop)) {
9126 9142 case NFS4_CHKSEQ_OKAY:
9127 9143 /*
9128 9144 * This is a replayed stateid; if
9129 9145 * seqid matches the next expected,
9130 9146 * then client is using wrong seqid.
9131 9147 */
9132 9148 case NFS4_CHKSEQ_BAD:
9133 9149 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9134 9150 goto end;
9135 9151 case NFS4_CHKSEQ_REPLAY:
9136 9152 rfs4_update_lease(lsp->rls_locker->rl_client);
9137 9153 *cs->statusp = status = resp->status;
9138 9154 goto end;
9139 9155 }
9140 9156 break;
9141 9157 default:
9142 9158 ASSERT(FALSE);
9143 9159 break;
9144 9160 }
9145 9161
9146 9162 rfs4_update_lock_sequence(lsp);
9147 9163 rfs4_update_lease(lsp->rls_locker->rl_client);
9148 9164 }
9149 9165
9150 9166 /*
9151 9167 * NFS4 only allows locking on regular files, so
9152 9168 * verify type of object.
9153 9169 */
9154 9170 if (cs->vp->v_type != VREG) {
9155 9171 if (cs->vp->v_type == VDIR)
9156 9172 status = NFS4ERR_ISDIR;
9157 9173 else
9158 9174 status = NFS4ERR_INVAL;
9159 9175 goto out;
9160 9176 }
9161 9177
9162 9178 cp = lsp->rls_state->rs_owner->ro_client;
9163 9179
9164 9180 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9165 9181 status = NFS4ERR_GRACE;
9166 9182 goto out;
9167 9183 }
9168 9184
9169 9185 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9170 9186 status = NFS4ERR_NO_GRACE;
9171 9187 goto out;
9172 9188 }
9173 9189
9174 9190 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9175 9191 status = NFS4ERR_NO_GRACE;
9176 9192 goto out;
9177 9193 }
9178 9194
9179 9195 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9180 9196 cs->deleg = TRUE;
9181 9197
9182 9198 status = rfs4_do_lock(lsp, args->locktype,
9183 9199 args->offset, args->length, cs->cr, resop);
9184 9200
9185 9201 out:
9186 9202 lsp->rls_skip_seqid_check = FALSE;
9187 9203
9188 9204 *cs->statusp = resp->status = status;
9189 9205
9190 9206 if (status == NFS4_OK) {
9191 9207 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9192 9208 lsp->rls_lock_completed = TRUE;
9193 9209 }
9194 9210 /*
9195 9211 * Only update the "OPEN" response here if this was a new
9196 9212 * lock_owner
9197 9213 */
9198 9214 if (sp)
9199 9215 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9200 9216
9201 9217 rfs4_update_lock_resp(lsp, resop);
9202 9218
9203 9219 end:
9204 9220 if (lsp) {
9205 9221 if (ls_sw_held)
9206 9222 rfs4_sw_exit(&lsp->rls_sw);
9207 9223 /*
9208 9224 * If an sp obtained, then the lsp does not represent
9209 9225 * a lock on the file struct.
9210 9226 */
9211 9227 if (sp != NULL)
9212 9228 rfs4_lo_state_rele(lsp, FALSE);
9213 9229 else
9214 9230 rfs4_lo_state_rele(lsp, TRUE);
9215 9231 }
9216 9232 if (sp) {
9217 9233 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9218 9234 rfs4_state_rele(sp);
9219 9235 }
9220 9236
9221 9237 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9222 9238 LOCK4res *, resp);
9223 9239 }
9224 9240
9225 9241 /* free function for LOCK/LOCKT */
9226 9242 static void
9227 9243 lock_denied_free(nfs_resop4 *resop)
9228 9244 {
9229 9245 LOCK4denied *dp = NULL;
9230 9246
9231 9247 switch (resop->resop) {
9232 9248 case OP_LOCK:
9233 9249 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9234 9250 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9235 9251 break;
9236 9252 case OP_LOCKT:
9237 9253 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9238 9254 dp = &resop->nfs_resop4_u.oplockt.denied;
9239 9255 break;
9240 9256 default:
9241 9257 break;
9242 9258 }
9243 9259
9244 9260 if (dp)
9245 9261 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9246 9262 }
9247 9263
9248 9264 /*ARGSUSED*/
9249 9265 void
9250 9266 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9251 9267 struct svc_req *req, struct compound_state *cs)
9252 9268 {
9253 9269 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9254 9270 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9255 9271 nfsstat4 status;
9256 9272 stateid4 *stateid = &args->lock_stateid;
9257 9273 rfs4_lo_state_t *lsp;
9258 9274
9259 9275 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9260 9276 LOCKU4args *, args);
9261 9277
9262 9278 if (cs->vp == NULL) {
9263 9279 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9264 9280 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9265 9281 LOCKU4res *, resp);
9266 9282 return;
9267 9283 }
9268 9284
9269 9285 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9270 9286 *cs->statusp = resp->status = status;
9271 9287 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9272 9288 LOCKU4res *, resp);
9273 9289 return;
9274 9290 }
9275 9291
9276 9292 /* Ensure specified filehandle matches */
9277 9293 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9278 9294 rfs4_lo_state_rele(lsp, TRUE);
9279 9295 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9280 9296 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9281 9297 LOCKU4res *, resp);
9282 9298 return;
9283 9299 }
9284 9300
9285 9301 /* hold off other access to lsp while we tinker */
9286 9302 rfs4_sw_enter(&lsp->rls_sw);
9287 9303
9288 9304 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9289 9305 case NFS4_CHECK_STATEID_OKAY:
9290 9306 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9291 9307 != NFS4_CHKSEQ_OKAY) {
9292 9308 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9293 9309 goto end;
9294 9310 }
9295 9311 break;
9296 9312 case NFS4_CHECK_STATEID_OLD:
9297 9313 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9298 9314 goto end;
9299 9315 case NFS4_CHECK_STATEID_BAD:
9300 9316 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9301 9317 goto end;
9302 9318 case NFS4_CHECK_STATEID_EXPIRED:
9303 9319 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9304 9320 goto end;
9305 9321 case NFS4_CHECK_STATEID_CLOSED:
9306 9322 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9307 9323 goto end;
9308 9324 case NFS4_CHECK_STATEID_REPLAY:
9309 9325 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9310 9326 case NFS4_CHKSEQ_OKAY:
9311 9327 /*
9312 9328 * This is a replayed stateid; if
9313 9329 * seqid matches the next expected,
9314 9330 * then client is using wrong seqid.
9315 9331 */
9316 9332 case NFS4_CHKSEQ_BAD:
9317 9333 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9318 9334 goto end;
9319 9335 case NFS4_CHKSEQ_REPLAY:
9320 9336 rfs4_update_lease(lsp->rls_locker->rl_client);
9321 9337 *cs->statusp = status = resp->status;
9322 9338 goto end;
9323 9339 }
9324 9340 break;
9325 9341 default:
9326 9342 ASSERT(FALSE);
9327 9343 break;
9328 9344 }
9329 9345
9330 9346 rfs4_update_lock_sequence(lsp);
9331 9347 rfs4_update_lease(lsp->rls_locker->rl_client);
9332 9348
9333 9349 /*
9334 9350 * NFS4 only allows locking on regular files, so
9335 9351 * verify type of object.
9336 9352 */
9337 9353 if (cs->vp->v_type != VREG) {
9338 9354 if (cs->vp->v_type == VDIR)
9339 9355 status = NFS4ERR_ISDIR;
9340 9356 else
9341 9357 status = NFS4ERR_INVAL;
9342 9358 goto out;
9343 9359 }
9344 9360
9345 9361 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9346 9362 status = NFS4ERR_GRACE;
9347 9363 goto out;
9348 9364 }
9349 9365
9350 9366 status = rfs4_do_lock(lsp, args->locktype,
9351 9367 args->offset, args->length, cs->cr, resop);
9352 9368
9353 9369 out:
9354 9370 *cs->statusp = resp->status = status;
9355 9371
9356 9372 if (status == NFS4_OK)
9357 9373 resp->lock_stateid = lsp->rls_lockid.stateid;
9358 9374
9359 9375 rfs4_update_lock_resp(lsp, resop);
9360 9376
9361 9377 end:
9362 9378 rfs4_sw_exit(&lsp->rls_sw);
9363 9379 rfs4_lo_state_rele(lsp, TRUE);
9364 9380
9365 9381 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9366 9382 LOCKU4res *, resp);
9367 9383 }
9368 9384
9369 9385 /*
9370 9386 * LOCKT is a best effort routine, the client can not be guaranteed that
9371 9387 * the status return is still in effect by the time the reply is received.
9372 9388 * They are numerous race conditions in this routine, but we are not required
9373 9389 * and can not be accurate.
9374 9390 */
9375 9391 /*ARGSUSED*/
9376 9392 void
9377 9393 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9378 9394 struct svc_req *req, struct compound_state *cs)
9379 9395 {
9380 9396 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9381 9397 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9382 9398 rfs4_lockowner_t *lo;
9383 9399 rfs4_client_t *cp;
9384 9400 bool_t create = FALSE;
9385 9401 struct flock64 flk;
9386 9402 int error;
9387 9403 int flag = FREAD | FWRITE;
9388 9404 int ltype;
9389 9405 length4 posix_length;
9390 9406 sysid_t sysid;
9391 9407 pid_t pid;
9392 9408
9393 9409 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9394 9410 LOCKT4args *, args);
9395 9411
9396 9412 if (cs->vp == NULL) {
9397 9413 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9398 9414 goto out;
9399 9415 }
9400 9416
9401 9417 /*
9402 9418 * NFS4 only allows locking on regular files, so
9403 9419 * verify type of object.
9404 9420 */
9405 9421 if (cs->vp->v_type != VREG) {
9406 9422 if (cs->vp->v_type == VDIR)
9407 9423 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9408 9424 else
9409 9425 *cs->statusp = resp->status = NFS4ERR_INVAL;
9410 9426 goto out;
9411 9427 }
9412 9428
9413 9429 /*
9414 9430 * Check out the clientid to ensure the server knows about it
9415 9431 * so that we correctly inform the client of a server reboot.
9416 9432 */
9417 9433 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9418 9434 == NULL) {
9419 9435 *cs->statusp = resp->status =
9420 9436 rfs4_check_clientid(&args->owner.clientid, 0);
9421 9437 goto out;
9422 9438 }
9423 9439 if (rfs4_lease_expired(cp)) {
9424 9440 rfs4_client_close(cp);
9425 9441 /*
9426 9442 * Protocol doesn't allow returning NFS4ERR_STALE as
9427 9443 * other operations do on this check so STALE_CLIENTID
9428 9444 * is returned instead
9429 9445 */
9430 9446 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9431 9447 goto out;
9432 9448 }
9433 9449
9434 9450 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9435 9451 *cs->statusp = resp->status = NFS4ERR_GRACE;
9436 9452 rfs4_client_rele(cp);
9437 9453 goto out;
9438 9454 }
9439 9455 rfs4_client_rele(cp);
9440 9456
9441 9457 resp->status = NFS4_OK;
9442 9458
9443 9459 switch (args->locktype) {
9444 9460 case READ_LT:
9445 9461 case READW_LT:
9446 9462 ltype = F_RDLCK;
9447 9463 break;
9448 9464 case WRITE_LT:
9449 9465 case WRITEW_LT:
9450 9466 ltype = F_WRLCK;
9451 9467 break;
9452 9468 }
9453 9469
9454 9470 posix_length = args->length;
9455 9471 /* Check for zero length. To lock to end of file use all ones for V4 */
9456 9472 if (posix_length == 0) {
9457 9473 *cs->statusp = resp->status = NFS4ERR_INVAL;
9458 9474 goto out;
9459 9475 } else if (posix_length == (length4)(~0)) {
9460 9476 posix_length = 0; /* Posix to end of file */
9461 9477 }
9462 9478
9463 9479 /* Find or create a lockowner */
9464 9480 lo = rfs4_findlockowner(&args->owner, &create);
9465 9481
9466 9482 if (lo) {
9467 9483 pid = lo->rl_pid;
9468 9484 if ((resp->status =
9469 9485 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9470 9486 goto err;
9471 9487 } else {
9472 9488 pid = 0;
9473 9489 sysid = lockt_sysid;
9474 9490 }
9475 9491 retry:
9476 9492 flk.l_type = ltype;
9477 9493 flk.l_whence = 0; /* SEEK_SET */
9478 9494 flk.l_start = args->offset;
9479 9495 flk.l_len = posix_length;
9480 9496 flk.l_sysid = sysid;
9481 9497 flk.l_pid = pid;
9482 9498 flag |= F_REMOTELOCK;
9483 9499
9484 9500 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9485 9501
9486 9502 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9487 9503 if (flk.l_len < 0 || flk.l_start < 0) {
9488 9504 resp->status = NFS4ERR_INVAL;
9489 9505 goto err;
9490 9506 }
9491 9507 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9492 9508 NULL, cs->cr, NULL);
9493 9509
9494 9510 /*
9495 9511 * N.B. We map error values to nfsv4 errors. This is differrent
9496 9512 * than puterrno4 routine.
9497 9513 */
9498 9514 switch (error) {
9499 9515 case 0:
9500 9516 if (flk.l_type == F_UNLCK)
9501 9517 resp->status = NFS4_OK;
9502 9518 else {
9503 9519 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9504 9520 goto retry;
9505 9521 resp->status = NFS4ERR_DENIED;
9506 9522 }
9507 9523 break;
9508 9524 case EOVERFLOW:
9509 9525 resp->status = NFS4ERR_INVAL;
9510 9526 break;
9511 9527 case EINVAL:
9512 9528 resp->status = NFS4ERR_NOTSUPP;
9513 9529 break;
9514 9530 default:
9515 9531 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9516 9532 error);
9517 9533 resp->status = NFS4ERR_SERVERFAULT;
9518 9534 break;
9519 9535 }
9520 9536
9521 9537 err:
9522 9538 if (lo)
9523 9539 rfs4_lockowner_rele(lo);
9524 9540 *cs->statusp = resp->status;
9525 9541 out:
9526 9542 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9527 9543 LOCKT4res *, resp);
9528 9544 }
9529 9545
9530 9546 int
9531 9547 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9532 9548 {
9533 9549 int err;
9534 9550 int cmd;
9535 9551 vnode_t *vp;
9536 9552 struct shrlock shr;
9537 9553 struct shr_locowner shr_loco;
9538 9554 int fflags = 0;
9539 9555
9540 9556 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9541 9557 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9542 9558
9543 9559 if (sp->rs_closed)
9544 9560 return (NFS4ERR_OLD_STATEID);
9545 9561
9546 9562 vp = sp->rs_finfo->rf_vp;
9547 9563 ASSERT(vp);
9548 9564
9549 9565 shr.s_access = shr.s_deny = 0;
9550 9566
9551 9567 if (access & OPEN4_SHARE_ACCESS_READ) {
9552 9568 fflags |= FREAD;
9553 9569 shr.s_access |= F_RDACC;
9554 9570 }
9555 9571 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9556 9572 fflags |= FWRITE;
9557 9573 shr.s_access |= F_WRACC;
9558 9574 }
9559 9575 ASSERT(shr.s_access);
9560 9576
9561 9577 if (deny & OPEN4_SHARE_DENY_READ)
9562 9578 shr.s_deny |= F_RDDNY;
9563 9579 if (deny & OPEN4_SHARE_DENY_WRITE)
9564 9580 shr.s_deny |= F_WRDNY;
9565 9581
9566 9582 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9567 9583 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9568 9584 shr_loco.sl_pid = shr.s_pid;
9569 9585 shr_loco.sl_id = shr.s_sysid;
9570 9586 shr.s_owner = (caddr_t)&shr_loco;
9571 9587 shr.s_own_len = sizeof (shr_loco);
9572 9588
9573 9589 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9574 9590
9575 9591 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9576 9592 if (err != 0) {
9577 9593 if (err == EAGAIN)
9578 9594 err = NFS4ERR_SHARE_DENIED;
9579 9595 else
9580 9596 err = puterrno4(err);
9581 9597 return (err);
9582 9598 }
9583 9599
9584 9600 sp->rs_share_access |= access;
9585 9601 sp->rs_share_deny |= deny;
9586 9602
9587 9603 return (0);
9588 9604 }
9589 9605
9590 9606 int
9591 9607 rfs4_unshare(rfs4_state_t *sp)
9592 9608 {
9593 9609 int err;
9594 9610 struct shrlock shr;
9595 9611 struct shr_locowner shr_loco;
9596 9612
9597 9613 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9598 9614
9599 9615 if (sp->rs_closed || sp->rs_share_access == 0)
9600 9616 return (0);
9601 9617
9602 9618 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9603 9619 ASSERT(sp->rs_finfo->rf_vp);
9604 9620
9605 9621 shr.s_access = shr.s_deny = 0;
9606 9622 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9607 9623 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9608 9624 shr_loco.sl_pid = shr.s_pid;
9609 9625 shr_loco.sl_id = shr.s_sysid;
9610 9626 shr.s_owner = (caddr_t)&shr_loco;
9611 9627 shr.s_own_len = sizeof (shr_loco);
9612 9628
9613 9629 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9614 9630 NULL);
9615 9631 if (err != 0) {
9616 9632 err = puterrno4(err);
9617 9633 return (err);
9618 9634 }
9619 9635
9620 9636 sp->rs_share_access = 0;
9621 9637 sp->rs_share_deny = 0;
9622 9638
9623 9639 return (0);
9624 9640
9625 9641 }
9626 9642
9627 9643 static int
9628 9644 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9629 9645 {
9630 9646 struct clist *wcl;
9631 9647 count4 count = rok->data_len;
9632 9648 int wlist_len;
9633 9649
9634 9650 wcl = args->wlist;
9635 9651 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9636 9652 return (FALSE);
9637 9653 }
9638 9654 wcl = args->wlist;
9639 9655 rok->wlist_len = wlist_len;
9640 9656 rok->wlist = wcl;
9641 9657 return (TRUE);
9642 9658 }
9643 9659
9644 9660 /* tunable to disable server referrals */
9645 9661 int rfs4_no_referrals = 0;
9646 9662
9647 9663 /*
9648 9664 * Find an NFS record in reparse point data.
9649 9665 * Returns 0 for success and <0 or an errno value on failure.
9650 9666 */
9651 9667 int
9652 9668 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9653 9669 {
9654 9670 int err;
9655 9671 char *stype, *val;
9656 9672 nvlist_t *nvl;
9657 9673 nvpair_t *curr;
9658 9674
9659 9675 if ((nvl = reparse_init()) == NULL)
9660 9676 return (-1);
9661 9677
9662 9678 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9663 9679 reparse_free(nvl);
9664 9680 return (err);
9665 9681 }
9666 9682
9667 9683 curr = NULL;
9668 9684 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9669 9685 if ((stype = nvpair_name(curr)) == NULL) {
9670 9686 reparse_free(nvl);
9671 9687 return (-2);
9672 9688 }
9673 9689 if (strncasecmp(stype, "NFS", 3) == 0)
9674 9690 break;
9675 9691 }
9676 9692
9677 9693 if ((curr == NULL) ||
9678 9694 (nvpair_value_string(curr, &val))) {
9679 9695 reparse_free(nvl);
9680 9696 return (-3);
9681 9697 }
9682 9698 *nvlp = nvl;
9683 9699 *svcp = stype;
9684 9700 *datap = val;
9685 9701 return (0);
9686 9702 }
9687 9703
9688 9704 int
9689 9705 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9690 9706 {
9691 9707 nvlist_t *nvl;
9692 9708 char *s, *d;
9693 9709
9694 9710 if (rfs4_no_referrals != 0)
9695 9711 return (B_FALSE);
9696 9712
9697 9713 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9698 9714 return (B_FALSE);
9699 9715
9700 9716 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9701 9717 return (B_FALSE);
9702 9718
9703 9719 reparse_free(nvl);
9704 9720
9705 9721 return (B_TRUE);
9706 9722 }
9707 9723
9708 9724 /*
9709 9725 * There is a user-level copy of this routine in ref_subr.c.
9710 9726 * Changes should be kept in sync.
9711 9727 */
9712 9728 static int
9713 9729 nfs4_create_components(char *path, component4 *comp4)
9714 9730 {
9715 9731 int slen, plen, ncomp;
9716 9732 char *ori_path, *nxtc, buf[MAXNAMELEN];
9717 9733
9718 9734 if (path == NULL)
9719 9735 return (0);
9720 9736
9721 9737 plen = strlen(path) + 1; /* include the terminator */
9722 9738 ori_path = path;
9723 9739 ncomp = 0;
9724 9740
9725 9741 /* count number of components in the path */
9726 9742 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9727 9743 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9728 9744 if ((slen = nxtc - path) == 0) {
9729 9745 path = nxtc + 1;
9730 9746 continue;
9731 9747 }
9732 9748
9733 9749 if (comp4 != NULL) {
9734 9750 bcopy(path, buf, slen);
9735 9751 buf[slen] = '\0';
9736 9752 (void) str_to_utf8(buf, &comp4[ncomp]);
9737 9753 }
9738 9754
9739 9755 ncomp++; /* 1 valid component */
9740 9756 path = nxtc + 1;
9741 9757 }
9742 9758 if (*nxtc == '\0' || *nxtc == '\n')
9743 9759 break;
9744 9760 }
9745 9761
9746 9762 return (ncomp);
9747 9763 }
9748 9764
9749 9765 /*
9750 9766 * There is a user-level copy of this routine in ref_subr.c.
9751 9767 * Changes should be kept in sync.
9752 9768 */
9753 9769 static int
9754 9770 make_pathname4(char *path, pathname4 *pathname)
9755 9771 {
9756 9772 int ncomp;
9757 9773 component4 *comp4;
9758 9774
9759 9775 if (pathname == NULL)
9760 9776 return (0);
9761 9777
9762 9778 if (path == NULL) {
9763 9779 pathname->pathname4_val = NULL;
9764 9780 pathname->pathname4_len = 0;
9765 9781 return (0);
9766 9782 }
9767 9783
9768 9784 /* count number of components to alloc buffer */
9769 9785 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9770 9786 pathname->pathname4_val = NULL;
9771 9787 pathname->pathname4_len = 0;
9772 9788 return (0);
9773 9789 }
9774 9790 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9775 9791
9776 9792 /* copy components into allocated buffer */
9777 9793 ncomp = nfs4_create_components(path, comp4);
9778 9794
9779 9795 pathname->pathname4_val = comp4;
9780 9796 pathname->pathname4_len = ncomp;
9781 9797
9782 9798 return (ncomp);
9783 9799 }
9784 9800
9785 9801 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9786 9802
9787 9803 fs_locations4 *
9788 9804 fetch_referral(vnode_t *vp, cred_t *cr)
9789 9805 {
9790 9806 nvlist_t *nvl;
9791 9807 char *stype, *sdata;
9792 9808 fs_locations4 *result;
9793 9809 char buf[1024];
9794 9810 size_t bufsize;
9795 9811 XDR xdr;
9796 9812 int err;
9797 9813
9798 9814 /*
9799 9815 * Check attrs to ensure it's a reparse point
9800 9816 */
9801 9817 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9802 9818 return (NULL);
9803 9819
9804 9820 /*
9805 9821 * Look for an NFS record and get the type and data
9806 9822 */
9807 9823 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9808 9824 return (NULL);
9809 9825
9810 9826 /*
9811 9827 * With the type and data, upcall to get the referral
9812 9828 */
9813 9829 bufsize = sizeof (buf);
9814 9830 bzero(buf, sizeof (buf));
9815 9831 err = reparse_kderef((const char *)stype, (const char *)sdata,
9816 9832 buf, &bufsize);
9817 9833 reparse_free(nvl);
9818 9834
9819 9835 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9820 9836 char *, stype, char *, sdata, char *, buf, int, err);
9821 9837 if (err) {
9822 9838 cmn_err(CE_NOTE,
9823 9839 "reparsed daemon not running: unable to get referral (%d)",
9824 9840 err);
9825 9841 return (NULL);
9826 9842 }
9827 9843
9828 9844 /*
9829 9845 * We get an XDR'ed record back from the kderef call
9830 9846 */
9831 9847 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9832 9848 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9833 9849 err = xdr_fs_locations4(&xdr, result);
9834 9850 XDR_DESTROY(&xdr);
9835 9851 if (err != TRUE) {
9836 9852 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9837 9853 int, err);
9838 9854 return (NULL);
9839 9855 }
9840 9856
9841 9857 /*
9842 9858 * Look at path to recover fs_root, ignoring the leading '/'
9843 9859 */
9844 9860 (void) make_pathname4(vp->v_path, &result->fs_root);
9845 9861
9846 9862 return (result);
9847 9863 }
9848 9864
9849 9865 char *
9850 9866 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9851 9867 {
9852 9868 fs_locations4 *fsl;
9853 9869 fs_location4 *fs;
9854 9870 char *server, *path, *symbuf;
9855 9871 static char *prefix = "/net/";
9856 9872 int i, size, npaths;
9857 9873 uint_t len;
9858 9874
9859 9875 /* Get the referral */
9860 9876 if ((fsl = fetch_referral(vp, cr)) == NULL)
9861 9877 return (NULL);
9862 9878
9863 9879 /* Deal with only the first location and first server */
9864 9880 fs = &fsl->locations_val[0];
9865 9881 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9866 9882 if (server == NULL) {
9867 9883 rfs4_free_fs_locations4(fsl);
9868 9884 kmem_free(fsl, sizeof (fs_locations4));
9869 9885 return (NULL);
9870 9886 }
9871 9887
9872 9888 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9873 9889 size = strlen(prefix) + len;
9874 9890 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9875 9891 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9876 9892
9877 9893 /* Allocate the symlink buffer and fill it */
9878 9894 symbuf = kmem_zalloc(size, KM_SLEEP);
9879 9895 (void) strcat(symbuf, prefix);
9880 9896 (void) strcat(symbuf, server);
9881 9897 kmem_free(server, len);
9882 9898
9883 9899 npaths = 0;
9884 9900 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9885 9901 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9886 9902 if (path == NULL)
9887 9903 continue;
9888 9904 (void) strcat(symbuf, "/");
9889 9905 (void) strcat(symbuf, path);
9890 9906 npaths++;
9891 9907 kmem_free(path, len);
9892 9908 }
9893 9909
9894 9910 rfs4_free_fs_locations4(fsl);
9895 9911 kmem_free(fsl, sizeof (fs_locations4));
9896 9912
9897 9913 if (strsz != NULL)
9898 9914 *strsz = size;
9899 9915 return (symbuf);
9900 9916 }
9901 9917
9902 9918 /*
9903 9919 * Check to see if we have a downrev Solaris client, so that we
9904 9920 * can send it a symlink instead of a referral.
9905 9921 */
9906 9922 int
9907 9923 client_is_downrev(struct svc_req *req)
9908 9924 {
9909 9925 struct sockaddr *ca;
9910 9926 rfs4_clntip_t *ci;
9911 9927 bool_t create = FALSE;
9912 9928 int is_downrev;
9913 9929
9914 9930 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9915 9931 ASSERT(ca);
9916 9932 ci = rfs4_find_clntip(ca, &create);
9917 9933 if (ci == NULL)
9918 9934 return (0);
9919 9935 is_downrev = ci->ri_no_referrals;
9920 9936 rfs4_dbe_rele(ci->ri_dbe);
9921 9937 return (is_downrev);
9922 9938 }
9923 9939
9924 9940 /*
9925 9941 * Do the main work of handling HA-NFSv4 Resource Group failover on
9926 9942 * Sun Cluster.
9927 9943 * We need to detect whether any RG admin paths have been added or removed,
9928 9944 * and adjust resources accordingly.
9929 9945 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9930 9946 * order to scale, the list and array of paths need to be held in more
9931 9947 * suitable data structures.
9932 9948 */
9933 9949 static void
9934 9950 hanfsv4_failover(nfs4_srv_t *nsrv4)
9935 9951 {
9936 9952 int i, start_grace, numadded_paths = 0;
9937 9953 char **added_paths = NULL;
9938 9954 rfs4_dss_path_t *dss_path;
9939 9955
9940 9956 /*
9941 9957 * Note: currently, dss_pathlist cannot be NULL, since
9942 9958 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9943 9959 * make the latter dynamically specified too, the following will
9944 9960 * need to be adjusted.
9945 9961 */
9946 9962
9947 9963 /*
9948 9964 * First, look for removed paths: RGs that have been failed-over
9949 9965 * away from this node.
9950 9966 * Walk the "currently-serving" dss_pathlist and, for each
9951 9967 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9952 9968 * from nfsd. If not, that RG path has been removed.
9953 9969 *
9954 9970 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9955 9971 * any duplicates.
9956 9972 */
9957 9973 dss_path = nsrv4->dss_pathlist;
9958 9974 do {
9959 9975 int found = 0;
9960 9976 char *path = dss_path->path;
9961 9977
9962 9978 /* used only for non-HA so may not be removed */
9963 9979 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9964 9980 dss_path = dss_path->next;
9965 9981 continue;
9966 9982 }
9967 9983
9968 9984 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9969 9985 int cmpret;
9970 9986 char *newpath = rfs4_dss_newpaths[i];
9971 9987
9972 9988 /*
9973 9989 * Since nfsd has sorted rfs4_dss_newpaths for us,
9974 9990 * once the return from strcmp is negative we know
9975 9991 * we've passed the point where "path" should be,
9976 9992 * and can stop searching: "path" has been removed.
9977 9993 */
9978 9994 cmpret = strcmp(path, newpath);
9979 9995 if (cmpret < 0)
9980 9996 break;
9981 9997 if (cmpret == 0) {
9982 9998 found = 1;
9983 9999 break;
9984 10000 }
9985 10001 }
9986 10002
9987 10003 if (found == 0) {
9988 10004 unsigned index = dss_path->index;
9989 10005 rfs4_servinst_t *sip = dss_path->sip;
9990 10006 rfs4_dss_path_t *path_next = dss_path->next;
9991 10007
9992 10008 /*
9993 10009 * This path has been removed.
9994 10010 * We must clear out the servinst reference to
9995 10011 * it, since it's now owned by another
9996 10012 * node: we should not attempt to touch it.
9997 10013 */
9998 10014 ASSERT(dss_path == sip->dss_paths[index]);
9999 10015 sip->dss_paths[index] = NULL;
10000 10016
10001 10017 /* remove from "currently-serving" list, and destroy */
10002 10018 remque(dss_path);
10003 10019 /* allow for NUL */
10004 10020 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10005 10021 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10006 10022
10007 10023 dss_path = path_next;
10008 10024 } else {
10009 10025 /* path was found; not removed */
10010 10026 dss_path = dss_path->next;
10011 10027 }
10012 10028 } while (dss_path != nsrv4->dss_pathlist);
10013 10029
10014 10030 /*
10015 10031 * Now, look for added paths: RGs that have been failed-over
10016 10032 * to this node.
10017 10033 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10018 10034 * for each path, check if it is on the "currently-serving"
10019 10035 * dss_pathlist. If not, that RG path has been added.
10020 10036 *
10021 10037 * Note: we don't do duplicate detection here; nfsd does that for us.
10022 10038 *
10023 10039 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10024 10040 * an upper bound for the size needed for added_paths[numadded_paths].
10025 10041 */
10026 10042
10027 10043 /* probably more space than we need, but guaranteed to be enough */
10028 10044 if (rfs4_dss_numnewpaths > 0) {
10029 10045 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10030 10046 added_paths = kmem_zalloc(sz, KM_SLEEP);
10031 10047 }
10032 10048
10033 10049 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10034 10050 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10035 10051 int found = 0;
10036 10052 char *newpath = rfs4_dss_newpaths[i];
10037 10053
10038 10054 dss_path = nsrv4->dss_pathlist;
10039 10055 do {
10040 10056 char *path = dss_path->path;
10041 10057
10042 10058 /* used only for non-HA */
10043 10059 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10044 10060 dss_path = dss_path->next;
10045 10061 continue;
10046 10062 }
10047 10063
10048 10064 if (strncmp(path, newpath, strlen(path)) == 0) {
10049 10065 found = 1;
10050 10066 break;
10051 10067 }
10052 10068
10053 10069 dss_path = dss_path->next;
10054 10070 } while (dss_path != nsrv4->dss_pathlist);
10055 10071
10056 10072 if (found == 0) {
10057 10073 added_paths[numadded_paths] = newpath;
10058 10074 numadded_paths++;
10059 10075 }
10060 10076 }
10061 10077
10062 10078 /* did we find any added paths? */
10063 10079 if (numadded_paths > 0) {
10064 10080
10065 10081 /* create a new server instance, and start its grace period */
10066 10082 start_grace = 1;
10067 10083 /* CSTYLED */
10068 10084 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10069 10085
10070 10086 /* read in the stable storage state from these paths */
10071 10087 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10072 10088
10073 10089 /*
10074 10090 * Multiple failovers during a grace period will cause
10075 10091 * clients of the same resource group to be partitioned
10076 10092 * into different server instances, with different
10077 10093 * grace periods. Since clients of the same resource
10078 10094 * group must be subject to the same grace period,
10079 10095 * we need to reset all currently active grace periods.
10080 10096 */
10081 10097 rfs4_grace_reset_all(nsrv4);
10082 10098 }
10083 10099
10084 10100 if (rfs4_dss_numnewpaths > 0)
10085 10101 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10086 10102 }
|
↓ open down ↓ |
9217 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX