1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T.
28 * All rights reserved.
29 */
30
31 /*
32 * Copyright 2018 Nexenta Systems, Inc.
33 */
34
35 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/time.h>
38 #include <sys/vfs.h>
39 #include <sys/vnode.h>
40 #include <sys/socket.h>
41 #include <sys/errno.h>
42 #include <sys/uio.h>
43 #include <sys/proc.h>
44 #include <sys/user.h>
45 #include <sys/file.h>
46 #include <sys/tiuser.h>
47 #include <sys/kmem.h>
48 #include <sys/pathname.h>
49 #include <sys/debug.h>
50 #include <sys/vtrace.h>
51 #include <sys/cmn_err.h>
52 #include <sys/acl.h>
53 #include <sys/utsname.h>
54 #include <sys/sdt.h>
55 #include <netinet/in.h>
56 #include <sys/avl.h>
57
58 #include <rpc/types.h>
59 #include <rpc/auth.h>
60 #include <rpc/svc.h>
61
62 #include <nfs/nfs.h>
63 #include <nfs/export.h>
64 #include <nfs/nfssys.h>
65 #include <nfs/nfs_clnt.h>
66 #include <nfs/nfs_acl.h>
67 #include <nfs/nfs_log.h>
68 #include <nfs/lm.h>
69 #include <sys/sunddi.h>
70
71 /*
72 * exi_id support
73 *
74 * exi_id_next The next exi_id available.
75 * exi_id_overflow The exi_id_next already overflowed, so we should
76 * thoroughly check for duplicates.
77 * exi_id_tree AVL tree indexed by exi_id.
78 * nfs_exi_id_lock Lock to protect the export ID list
79 *
80 * All exi_id_next, exi_id_overflow, and exi_id_tree are protected by
81 * nfs_exi_id_lock.
82 */
83 static int exi_id_next;
84 static bool_t exi_id_overflow;
85 avl_tree_t exi_id_tree;
86 kmutex_t nfs_exi_id_lock;
87
88 static int unexport(nfs_export_t *, exportinfo_t *);
89 static void exportfree(exportinfo_t *);
90 static int loadindex(exportdata_t *);
91
92 extern void nfsauth_cache_free(exportinfo_t *);
93 extern int sec_svc_loadrootnames(int, int, caddr_t **, model_t);
94 extern void sec_svc_freerootnames(int, int, caddr_t *);
95
96 static int build_seclist_nodups(exportdata_t *, secinfo_t *, int);
97 static void srv_secinfo_add(secinfo_t **, int *, secinfo_t *, int, int);
98 static void srv_secinfo_remove(secinfo_t **, int *, secinfo_t *, int);
99 static void srv_secinfo_treeclimb(nfs_export_t *, exportinfo_t *,
100 secinfo_t *, int, bool_t);
101
102 #ifdef VOLATILE_FH_TEST
103 static struct ex_vol_rename *find_volrnm_fh(exportinfo_t *, nfs_fh4 *);
104 static uint32_t find_volrnm_fh_id(exportinfo_t *, nfs_fh4 *);
105 static void free_volrnm_list(exportinfo_t *);
106 #endif /* VOLATILE_FH_TEST */
107
108 fhandle_t nullfh2; /* for comparing V2 filehandles */
109
110 /*
111 * macro for static dtrace probes to trace server namespace ref count mods.
112 */
113 #define SECREF_TRACE(seclist, tag, flav, aftcnt) \
114 DTRACE_PROBE4(nfss__i__nmspc__secref, struct secinfo *, (seclist), \
115 char *, (tag), int, (int)(flav), int, (int)(aftcnt))
116
117
118 #define exptablehash(fsid, fid) (nfs_fhhash((fsid), (fid)) & (EXPTABLESIZE - 1))
119
120 extern nfs_export_t *
121 nfs_get_export(void)
122 {
123 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
124 nfs_export_t *ne = ng->nfs_export;
125 ASSERT(ne != NULL);
126 return (ne);
127 }
128
129 static uint8_t
130 xor_hash(uint8_t *data, int len)
131 {
132 uint8_t h = 0;
133
134 while (len--)
135 h ^= *data++;
136
137 return (h);
138 }
139
140 /*
141 * File handle hash function, XOR over all bytes in fsid and fid.
142 */
143 static unsigned
144 nfs_fhhash(fsid_t *fsid, fid_t *fid)
145 {
146 int len;
147 uint8_t h;
148
149 h = xor_hash((uint8_t *)fsid, sizeof (fsid_t));
150
151 /*
152 * Sanity check the length before using it
153 * blindly in case the client trashed it.
154 */
155 len = fid->fid_len > NFS_FH4MAXDATA ? 0 : fid->fid_len;
156 h ^= xor_hash((uint8_t *)fid->fid_data, len);
157
158 return ((unsigned)h);
159 }
160
161 /*
162 * Free the memory allocated within a secinfo entry.
163 */
164 void
165 srv_secinfo_entry_free(struct secinfo *secp)
166 {
167 if (secp->s_rootcnt > 0 && secp->s_rootnames != NULL) {
168 sec_svc_freerootnames(secp->s_secinfo.sc_rpcnum,
169 secp->s_rootcnt, secp->s_rootnames);
170 secp->s_rootcnt = 0;
171 }
172
173 if ((secp->s_secinfo.sc_rpcnum == RPCSEC_GSS) &&
174 (secp->s_secinfo.sc_gss_mech_type)) {
175 kmem_free(secp->s_secinfo.sc_gss_mech_type->elements,
176 secp->s_secinfo.sc_gss_mech_type->length);
177 kmem_free(secp->s_secinfo.sc_gss_mech_type,
178 sizeof (rpc_gss_OID_desc));
179 secp->s_secinfo.sc_gss_mech_type = NULL;
180 }
181 }
182
183 /*
184 * Free a list of secinfo allocated in the exportdata structure.
185 */
186 void
187 srv_secinfo_list_free(struct secinfo *secinfo, int cnt)
188 {
189 int i;
190
191 if (cnt == 0)
192 return;
193
194 for (i = 0; i < cnt; i++)
195 srv_secinfo_entry_free(&secinfo[i]);
196
197 kmem_free(secinfo, cnt * sizeof (struct secinfo));
198 }
199
200 /*
201 * Allocate and copy a secinfo data from "from" to "to".
202 *
203 * This routine is used by srv_secinfo_add() to add a new flavor to an
204 * ancestor's export node. The rootnames are not copied because the
205 * allowable rootname access only applies to the explicit exported node,
206 * not its ancestor's.
207 *
208 * "to" should have already been allocated and zeroed before calling
209 * this routine.
210 *
211 * This routine is used under the protection of exported_lock (RW_WRITER).
212 */
213 void
214 srv_secinfo_copy(struct secinfo *from, struct secinfo *to)
215 {
216 to->s_secinfo.sc_nfsnum = from->s_secinfo.sc_nfsnum;
217 to->s_secinfo.sc_rpcnum = from->s_secinfo.sc_rpcnum;
218
219 if (from->s_secinfo.sc_rpcnum == RPCSEC_GSS) {
220 to->s_secinfo.sc_service = from->s_secinfo.sc_service;
221 bcopy(from->s_secinfo.sc_name, to->s_secinfo.sc_name,
222 strlen(from->s_secinfo.sc_name));
223 bcopy(from->s_secinfo.sc_gss_mech, to->s_secinfo.sc_gss_mech,
224 strlen(from->s_secinfo.sc_gss_mech));
225
226 /* copy mechanism oid */
227 to->s_secinfo.sc_gss_mech_type =
228 kmem_alloc(sizeof (rpc_gss_OID_desc), KM_SLEEP);
229 to->s_secinfo.sc_gss_mech_type->length =
230 from->s_secinfo.sc_gss_mech_type->length;
231 to->s_secinfo.sc_gss_mech_type->elements =
232 kmem_alloc(from->s_secinfo.sc_gss_mech_type->length,
233 KM_SLEEP);
234 bcopy(from->s_secinfo.sc_gss_mech_type->elements,
235 to->s_secinfo.sc_gss_mech_type->elements,
236 from->s_secinfo.sc_gss_mech_type->length);
237 }
238
239 to->s_refcnt = from->s_refcnt;
240 to->s_window = from->s_window;
241 /* no need to copy the mode bits - s_flags */
242 }
243
244 /*
245 * Create a secinfo array without duplicates. The condensed
246 * flavor list is used to propagate flavor ref counts to an
247 * export's ancestor pseudonodes.
248 */
249 static int
250 build_seclist_nodups(exportdata_t *exd, secinfo_t *nodups, int exponly)
251 {
252 int ccnt, c;
253 int ncnt, n;
254 struct secinfo *cursec;
255
256 ncnt = 0;
257 ccnt = exd->ex_seccnt;
258 cursec = exd->ex_secinfo;
259
260 for (c = 0; c < ccnt; c++) {
261
262 if (exponly && ! SEC_REF_EXPORTED(&cursec[c]))
263 continue;
264
265 for (n = 0; n < ncnt; n++) {
266 if (nodups[n].s_secinfo.sc_nfsnum ==
267 cursec[c].s_secinfo.sc_nfsnum)
268 break;
269 }
270
271 /*
272 * The structure copy below also copys ptrs embedded
273 * within struct secinfo. The ptrs are copied but
274 * they are never freed from the nodups array. If
275 * an ancestor's secinfo array doesn't contain one
276 * of the nodups flavors, then the entry is properly
277 * copied into the ancestor's secinfo array.
278 * (see srv_secinfo_copy)
279 */
280 if (n == ncnt) {
281 nodups[n] = cursec[c];
282 ncnt++;
283 }
284 }
285 return (ncnt);
286 }
287
288 /*
289 * Add the new security flavors from newdata to the current list, pcursec.
290 * Upon return, *pcursec has the newly merged secinfo list.
291 *
292 * There should be at least 1 secinfo entry in newsec.
293 *
294 * This routine is used under the protection of exported_lock (RW_WRITER).
295 */
296 static void
297 srv_secinfo_add(secinfo_t **pcursec, int *pcurcnt, secinfo_t *newsec,
298 int newcnt, int is_pseudo)
299 {
300 int ccnt, c; /* sec count in current data - curdata */
301 int n; /* index for newsec - newsecinfo */
302 int tcnt; /* total sec count after merge */
303 int mcnt; /* total sec count after merge */
304 struct secinfo *msec; /* merged secinfo list */
305 struct secinfo *cursec;
306
307 cursec = *pcursec;
308 ccnt = *pcurcnt;
309
310 ASSERT(newcnt > 0);
311 tcnt = ccnt + newcnt;
312
313 for (n = 0; n < newcnt; n++) {
314 for (c = 0; c < ccnt; c++) {
315 if (newsec[n].s_secinfo.sc_nfsnum ==
316 cursec[c].s_secinfo.sc_nfsnum) {
317 cursec[c].s_refcnt += newsec[n].s_refcnt;
318 SECREF_TRACE(cursec, "add_ref",
319 cursec[c].s_secinfo.sc_nfsnum,
320 cursec[c].s_refcnt);
321 tcnt--;
322 break;
323 }
324 }
325 }
326
327 if (tcnt == ccnt)
328 return; /* no change; no new flavors */
329
330 msec = kmem_zalloc(tcnt * sizeof (struct secinfo), KM_SLEEP);
331
332 /* move current secinfo list data to the new list */
333 for (c = 0; c < ccnt; c++)
334 msec[c] = cursec[c];
335
336 /* Add the flavor that's not in the current data */
337 mcnt = ccnt;
338 for (n = 0; n < newcnt; n++) {
339 for (c = 0; c < ccnt; c++) {
340 if (newsec[n].s_secinfo.sc_nfsnum ==
341 cursec[c].s_secinfo.sc_nfsnum)
342 break;
343 }
344
345 /* This is the one. Add it. */
346 if (c == ccnt) {
347 srv_secinfo_copy(&newsec[n], &msec[mcnt]);
348
349 if (is_pseudo)
350 msec[mcnt].s_flags = M_RO;
351
352 SECREF_TRACE(msec, "new_ref",
353 msec[mcnt].s_secinfo.sc_nfsnum,
354 msec[mcnt].s_refcnt);
355 mcnt++;
356 }
357 }
358
359 ASSERT(mcnt == tcnt);
360
361 /*
362 * Done. Update curdata. Free the old secinfo list in
363 * curdata and return the new sec array info
364 */
365 if (ccnt > 0)
366 kmem_free(cursec, ccnt * sizeof (struct secinfo));
367 *pcurcnt = tcnt;
368 *pcursec = msec;
369 }
370
371 /*
372 * For NFS V4.
373 * Remove the security data of the unexported node from its ancestors.
374 * Assume there is at least one flavor entry in the current sec list
375 * (pcursec).
376 *
377 * This routine is used under the protection of exported_lock (RW_WRITER).
378 *
379 * Every element of remsec is an explicitly exported flavor. If
380 * srv_secinfo_remove() is called fom an exportfs error path, then
381 * the flavor list was derived from the user's share cmdline,
382 * and all flavors are explicit. If it was called from the unshare path,
383 * build_seclist_nodups() was called with the exponly flag.
384 */
385 static void
386 srv_secinfo_remove(secinfo_t **pcursec, int *pcurcnt, secinfo_t *remsec,
387 int remcnt)
388 {
389 int ccnt, c; /* sec count in current data - cursec */
390 int r; /* sec count in removal data - remsec */
391 int tcnt, mcnt; /* total sec count after removing */
392 struct secinfo *msec; /* final secinfo list after removing */
393 struct secinfo *cursec;
394
395 cursec = *pcursec;
396 ccnt = *pcurcnt;
397 tcnt = ccnt;
398
399 for (r = 0; r < remcnt; r++) {
400 /*
401 * At unshare/reshare time, only explicitly shared flavor ref
402 * counts are decremented and propagated to ancestors.
403 * Implicit flavor refs came from shared descendants, and
404 * they must be kept.
405 */
406 if (! SEC_REF_EXPORTED(&remsec[r]))
407 continue;
408
409 for (c = 0; c < ccnt; c++) {
410 if (remsec[r].s_secinfo.sc_nfsnum ==
411 cursec[c].s_secinfo.sc_nfsnum) {
412
413 /*
414 * Decrement secinfo reference count by 1.
415 * If this entry is invalid after decrementing
416 * the count (i.e. count < 1), this entry will
417 * be removed.
418 */
419 cursec[c].s_refcnt--;
420
421 SECREF_TRACE(cursec, "del_ref",
422 cursec[c].s_secinfo.sc_nfsnum,
423 cursec[c].s_refcnt);
424
425 ASSERT(cursec[c].s_refcnt >= 0);
426
427 if (SEC_REF_INVALID(&cursec[c]))
428 tcnt--;
429 break;
430 }
431 }
432 }
433
434 ASSERT(tcnt >= 0);
435 if (tcnt == ccnt)
436 return; /* no change; no flavors to remove */
437
438 if (tcnt == 0) {
439 srv_secinfo_list_free(cursec, ccnt);
440 *pcurcnt = 0;
441 *pcursec = NULL;
442 return;
443 }
444
445 msec = kmem_zalloc(tcnt * sizeof (struct secinfo), KM_SLEEP);
446
447 /* walk thru the given secinfo list to remove the flavors */
448 mcnt = 0;
449 for (c = 0; c < ccnt; c++) {
450 if (SEC_REF_INVALID(&cursec[c])) {
451 srv_secinfo_entry_free(&cursec[c]);
452 } else {
453 msec[mcnt] = cursec[c];
454 mcnt++;
455 }
456 }
457
458 ASSERT(mcnt == tcnt);
459 /*
460 * Done. Update curdata.
461 * Free the existing secinfo list in curdata. All pointers
462 * within the list have either been moved to msec or freed
463 * if it's invalid.
464 */
465 kmem_free(*pcursec, ccnt * sizeof (struct secinfo));
466 *pcursec = msec;
467 *pcurcnt = tcnt;
468 }
469
470
471 /*
472 * For the reshare case, sec flavor accounting happens in 3 steps:
473 * 1) propagate addition of new flavor refs up the ancestor tree
474 * 2) transfer flavor refs of descendants to new/reshared exportdata
475 * 3) propagate removal of old flavor refs up the ancestor tree
476 *
477 * srv_secinfo_exp2exp() implements step 2 of a reshare. At this point,
478 * the new flavor list has already been propagated up through the
479 * ancestor tree via srv_secinfo_treeclimb().
480 *
481 * If there is more than 1 export reference to an old flavor (i.e. some
482 * of its children shared with this flavor), this flavor information
483 * needs to be transferred to the new exportdata struct. A flavor in
484 * the old exportdata has descendant refs when its s_refcnt > 1 or it
485 * is implicitly shared (M_SEC4_EXPORTED not set in s_flags).
486 *
487 * SEC_REF_EXPORTED() is only true when M_SEC4_EXPORTED is set
488 * SEC_REF_SELF() is only true when both M_SEC4_EXPORTED is set and s_refcnt==1
489 *
490 * Transferring descendant flavor refcnts happens in 2 passes:
491 * a) flavors used before (oldsecinfo) and after (curdata->ex_secinfo) reshare
492 * b) flavors used before but not after reshare
493 *
494 * This routine is used under the protection of exported_lock (RW_WRITER).
495 */
496 void
497 srv_secinfo_exp2exp(exportdata_t *curdata, secinfo_t *oldsecinfo, int ocnt)
498 {
499 int ccnt, c; /* sec count in current data - curdata */
500 int o; /* sec count in old data - oldsecinfo */
501 int tcnt, mcnt; /* total sec count after the transfer */
502 struct secinfo *msec; /* merged secinfo list */
503
504 ccnt = curdata->ex_seccnt;
505
506 ASSERT(ocnt > 0);
507 ASSERT(!(curdata->ex_flags & EX_PSEUDO));
508
509 /*
510 * If the oldsecinfo has flavors with more than 1 reference count
511 * and the flavor is specified in the reshare, transfer the flavor
512 * refs to the new seclist (curdata.ex_secinfo).
513 */
514 tcnt = ccnt + ocnt;
515
516 for (o = 0; o < ocnt; o++) {
517
518 if (SEC_REF_SELF(&oldsecinfo[o])) {
519 tcnt--;
520 continue;
521 }
522
523 for (c = 0; c < ccnt; c++) {
524 if (oldsecinfo[o].s_secinfo.sc_nfsnum ==
525 curdata->ex_secinfo[c].s_secinfo.sc_nfsnum) {
526
527 /*
528 * add old reference to the current
529 * secinfo count
530 */
531 curdata->ex_secinfo[c].s_refcnt +=
532 oldsecinfo[o].s_refcnt;
533
534 /*
535 * Delete the old export flavor
536 * reference. The initial reference
537 * was created during srv_secinfo_add,
538 * and the count is decremented below
539 * to account for the initial reference.
540 */
541 if (SEC_REF_EXPORTED(&oldsecinfo[o]))
542 curdata->ex_secinfo[c].s_refcnt--;
543
544 SECREF_TRACE(curdata->ex_path,
545 "reshare_xfer_common_child_refs",
546 curdata->ex_secinfo[c].s_secinfo.sc_nfsnum,
547 curdata->ex_secinfo[c].s_refcnt);
548
549 ASSERT(curdata->ex_secinfo[c].s_refcnt >= 0);
550
551 tcnt--;
552 break;
553 }
554 }
555 }
556
557 if (tcnt == ccnt)
558 return; /* no more transfer to do */
559
560 /*
561 * oldsecinfo has flavors referenced by its children that are not
562 * in the current (new) export flavor list. Add these flavors.
563 */
564 msec = kmem_zalloc(tcnt * sizeof (struct secinfo), KM_SLEEP);
565
566 /* move current secinfo list data to the new list */
567 for (c = 0; c < ccnt; c++)
568 msec[c] = curdata->ex_secinfo[c];
569
570 /*
571 * Add the flavor that's not in the new export, but still
572 * referenced by its children.
573 */
574 mcnt = ccnt;
575 for (o = 0; o < ocnt; o++) {
576 if (! SEC_REF_SELF(&oldsecinfo[o])) {
577 for (c = 0; c < ccnt; c++) {
578 if (oldsecinfo[o].s_secinfo.sc_nfsnum ==
579 curdata->ex_secinfo[c].s_secinfo.sc_nfsnum)
580 break;
581 }
582
583 /*
584 * This is the one. Add it. Decrement the ref count
585 * by 1 if the flavor is an explicitly shared flavor
586 * for the oldsecinfo export node.
587 */
588 if (c == ccnt) {
589 srv_secinfo_copy(&oldsecinfo[o], &msec[mcnt]);
590 if (SEC_REF_EXPORTED(&oldsecinfo[o]))
591 msec[mcnt].s_refcnt--;
592
593 SECREF_TRACE(curdata,
594 "reshare_xfer_implicit_child_refs",
595 msec[mcnt].s_secinfo.sc_nfsnum,
596 msec[mcnt].s_refcnt);
597
598 ASSERT(msec[mcnt].s_refcnt >= 0);
599 mcnt++;
600 }
601 }
602 }
603
604 ASSERT(mcnt == tcnt);
605 /*
606 * Done. Update curdata, free the existing secinfo list in
607 * curdata and set the new value.
608 */
609 if (ccnt > 0)
610 kmem_free(curdata->ex_secinfo, ccnt * sizeof (struct secinfo));
611 curdata->ex_seccnt = tcnt;
612 curdata->ex_secinfo = msec;
613 }
614
615 /*
616 * When unsharing an old export node and the old node becomes a pseudo node,
617 * if there is more than 1 export reference to an old flavor (i.e. some of
618 * its children shared with this flavor), this flavor information needs to
619 * be transferred to the new shared node.
620 *
621 * This routine is used under the protection of exported_lock (RW_WRITER).
622 */
623 void
624 srv_secinfo_exp2pseu(exportdata_t *curdata, exportdata_t *olddata)
625 {
626 int ocnt, o; /* sec count in transfer data - trandata */
627 int tcnt, mcnt; /* total sec count after transfer */
628 struct secinfo *msec; /* merged secinfo list */
629
630 ASSERT(curdata->ex_flags & EX_PSEUDO);
631 ASSERT(curdata->ex_seccnt == 0);
632
633 ocnt = olddata->ex_seccnt;
634
635 /*
636 * If the olddata has flavors with more than 1 reference count,
637 * transfer the information to the curdata.
638 */
639 tcnt = ocnt;
640
641 for (o = 0; o < ocnt; o++) {
642 if (SEC_REF_SELF(&olddata->ex_secinfo[o]))
643 tcnt--;
644 }
645
646 if (tcnt == 0)
647 return; /* no transfer to do */
648
649 msec = kmem_zalloc(tcnt * sizeof (struct secinfo), KM_SLEEP);
650
651 mcnt = 0;
652 for (o = 0; o < ocnt; o++) {
653 if (! SEC_REF_SELF(&olddata->ex_secinfo[o])) {
654
655 /*
656 * Decrement the reference count by 1 if the flavor is
657 * an explicitly shared flavor for the olddata export
658 * node.
659 */
660 srv_secinfo_copy(&olddata->ex_secinfo[o], &msec[mcnt]);
661 msec[mcnt].s_flags = M_RO;
662 if (SEC_REF_EXPORTED(&olddata->ex_secinfo[o]))
663 msec[mcnt].s_refcnt--;
664
665 SECREF_TRACE(curdata, "unshare_morph_pseudo",
666 msec[mcnt].s_secinfo.sc_nfsnum,
667 msec[mcnt].s_refcnt);
668
669 ASSERT(msec[mcnt].s_refcnt >= 0);
670 mcnt++;
671 }
672 }
673
674 ASSERT(mcnt == tcnt);
675 /*
676 * Done. Update curdata.
677 * Free up the existing secinfo list in curdata and
678 * set the new value.
679 */
680 curdata->ex_seccnt = tcnt;
681 curdata->ex_secinfo = msec;
682 }
683
684 /*
685 * Find for given treenode the exportinfo which has its
686 * exp_visible linked on its exi_visible list.
687 *
688 * Note: We could add new pointer either to treenode or
689 * to exp_visible, which will point there directly.
690 * This would buy some speed for some memory.
691 */
692 exportinfo_t *
693 vis2exi(treenode_t *tnode)
694 {
695 exportinfo_t *exi_ret = NULL;
696 #ifdef DEBUG
697 zone_t *zone = NULL;
698 #endif
699
700 for (;;) {
701 tnode = tnode->tree_parent;
702 #ifdef DEBUG
703 if (zone == NULL && tnode->tree_exi != NULL)
704 zone = tnode->tree_exi->exi_zone;
705 #endif
706 if (TREE_ROOT(tnode)) {
707 ASSERT3P(zone, ==, tnode->tree_exi->exi_zone);
708 exi_ret = tnode->tree_exi;
709 break;
710 }
711 }
712
713 ASSERT(exi_ret); /* Every visible should have its home exportinfo */
714 return (exi_ret);
715 }
716
717 /*
718 * For NFS V4.
719 * Add or remove the newly exported or unexported security flavors of the
720 * given exportinfo from its ancestors upto the system root.
721 */
722 void
723 srv_secinfo_treeclimb(nfs_export_t *ne, exportinfo_t *exip, secinfo_t *sec,
724 int seccnt, bool_t isadd)
725 {
726 treenode_t *tnode;
727
728 ASSERT(RW_WRITE_HELD(&ne->exported_lock));
729
730 /*
731 * exi_tree can be null for the zone root
732 * which means we're already at the "top"
733 * and there's nothing more to "climb".
734 */
735 tnode = exip->exi_tree;
736 if (tnode == NULL) {
737 /* Should only happen for... */
738 ASSERT(exip == ne->exi_root);
739 return;
740 }
741
742 if (seccnt == 0)
743 return;
744
745 /*
746 * If flavors are being added and the new export root isn't
747 * also VROOT, its implicitly allowed flavors are inherited from
748 * its pseudonode.
749 * Note - for VROOT exports the implicitly allowed flavors were
750 * transferred from the PSEUDO export in exportfs()
751 */
752 if (isadd && !(exip->exi_vp->v_flag & VROOT) &&
753 !VN_IS_CURZONEROOT(exip->exi_vp) &&
754 tnode->tree_vis->vis_seccnt > 0) {
755 srv_secinfo_add(&exip->exi_export.ex_secinfo,
756 &exip->exi_export.ex_seccnt, tnode->tree_vis->vis_secinfo,
757 tnode->tree_vis->vis_seccnt, FALSE);
758 }
759
760 /*
761 * Move to parent node and propagate sec flavor
762 * to exportinfo and to visible structures.
763 */
764 tnode = tnode->tree_parent;
765
766 while (tnode != NULL) {
767
768 /* If there is exportinfo, update it */
769 if (tnode->tree_exi != NULL) {
770 secinfo_t **pxsec =
771 &tnode->tree_exi->exi_export.ex_secinfo;
772 int *pxcnt = &tnode->tree_exi->exi_export.ex_seccnt;
773 int is_pseudo = PSEUDO(tnode->tree_exi);
774 if (isadd)
775 srv_secinfo_add(pxsec, pxcnt, sec, seccnt,
776 is_pseudo);
777 else
778 srv_secinfo_remove(pxsec, pxcnt, sec, seccnt);
779 }
780
781 /* Update every visible - only root node has no visible */
782 if (tnode->tree_vis != NULL) {
783 secinfo_t **pxsec = &tnode->tree_vis->vis_secinfo;
784 int *pxcnt = &tnode->tree_vis->vis_seccnt;
785 if (isadd)
786 srv_secinfo_add(pxsec, pxcnt, sec, seccnt,
787 FALSE);
788 else
789 srv_secinfo_remove(pxsec, pxcnt, sec, seccnt);
790 }
791 tnode = tnode->tree_parent;
792 }
793 }
794
795 /* hash_name is a text substitution for either fid_hash or path_hash */
796 #define exp_hash_unlink(exi, hash_name) \
797 if (*(exi)->hash_name.bckt == (exi)) \
798 *(exi)->hash_name.bckt = (exi)->hash_name.next; \
799 if ((exi)->hash_name.prev) \
800 (exi)->hash_name.prev->hash_name.next = (exi)->hash_name.next; \
801 if ((exi)->hash_name.next) \
802 (exi)->hash_name.next->hash_name.prev = (exi)->hash_name.prev; \
803 (exi)->hash_name.bckt = NULL;
804
805 #define exp_hash_link(exi, hash_name, bucket) \
806 (exi)->hash_name.bckt = (bucket); \
807 (exi)->hash_name.prev = NULL; \
808 (exi)->hash_name.next = *(bucket); \
809 if ((exi)->hash_name.next) \
810 (exi)->hash_name.next->hash_name.prev = (exi); \
811 *(bucket) = (exi);
812
813 void
814 export_link(nfs_export_t *ne, exportinfo_t *exi)
815 {
816 exportinfo_t **bckt;
817
818 ASSERT(RW_WRITE_HELD(&ne->exported_lock));
819 ASSERT(exi->exi_zoneid == ne->ne_globals->nfs_zoneid);
820
821 bckt = &ne->exptable[exptablehash(&exi->exi_fsid, &exi->exi_fid)];
822 exp_hash_link(exi, fid_hash, bckt);
823
824 bckt = &ne->exptable_path_hash[pkp_tab_hash(exi->exi_export.ex_path,
825 strlen(exi->exi_export.ex_path))];
826 exp_hash_link(exi, path_hash, bckt);
827 }
828
829 /*
830 * Helper functions for exi_id handling
831 */
832 static int
833 exi_id_compar(const void *v1, const void *v2)
834 {
835 const struct exportinfo *e1 = v1;
836 const struct exportinfo *e2 = v2;
837
838 if (e1->exi_id < e2->exi_id)
839 return (-1);
840 if (e1->exi_id > e2->exi_id)
841 return (1);
842
843 return (0);
844 }
845
846 int
847 exi_id_get_next()
848 {
849 struct exportinfo e;
850 int ret = exi_id_next;
851
852 ASSERT(MUTEX_HELD(&nfs_exi_id_lock));
853
854 do {
855 exi_id_next++;
856 if (exi_id_next == 0)
857 exi_id_overflow = TRUE;
858
859 if (!exi_id_overflow)
860 break;
861
862 if (exi_id_next == ret)
863 cmn_err(CE_PANIC, "exi_id exhausted");
864
865 e.exi_id = exi_id_next;
866 } while (avl_find(&exi_id_tree, &e, NULL) != NULL);
867
868 return (ret);
869 }
870
871 /*
872 * Get the root file handle for this zone.
873 * Called when nfs_svc() starts
874 */
875 int
876 nfs_export_get_rootfh(nfs_globals_t *g)
877 {
878 nfs_export_t *ne = g->nfs_export;
879 int err;
880
881 ne->exi_rootfid.fid_len = MAXFIDSZ;
882 err = vop_fid_pseudo(ne->exi_root->exi_vp, &ne->exi_rootfid);
883 if (err != 0) {
884 ne->exi_rootfid.fid_len = 0;
885 return (err);
886 }
887
888 /* Setup the fhandle template exi_fh */
889 ne->exi_root->exi_fh.fh_fsid = rootdir->v_vfsp->vfs_fsid;
890 ne->exi_root->exi_fh.fh_xlen = ne->exi_rootfid.fid_len;
891 bcopy(ne->exi_rootfid.fid_data, ne->exi_root->exi_fh.fh_xdata,
892 ne->exi_rootfid.fid_len);
893 ne->exi_root->exi_fh.fh_len = sizeof (ne->exi_root->exi_fh.fh_data);
894
895 return (0);
896 }
897
898 void
899 nfs_export_zone_init(nfs_globals_t *ng)
900 {
901 int i;
902 nfs_export_t *ne;
903
904 ne = kmem_zalloc(sizeof (*ne), KM_SLEEP);
905
906 rw_init(&ne->exported_lock, NULL, RW_DEFAULT, NULL);
907
908 ne->ne_globals = ng; /* "up" pointer */
909
910 /*
911 * Allocate the place holder for the public file handle, which
912 * is all zeroes. It is initially set to the root filesystem.
913 */
914 ne->exi_root = kmem_zalloc(sizeof (*ne->exi_root), KM_SLEEP);
915 ne->exi_public = ne->exi_root;
916
917 ne->exi_root->exi_export.ex_flags = EX_PUBLIC;
918 ne->exi_root->exi_export.ex_pathlen = 1; /* length of "/" */
919 ne->exi_root->exi_export.ex_path =
920 kmem_alloc(ne->exi_root->exi_export.ex_pathlen + 1, KM_SLEEP);
921 ne->exi_root->exi_export.ex_path[0] = '/';
922 ne->exi_root->exi_export.ex_path[1] = '\0';
923
924 ne->exi_root->exi_count = 1;
925 mutex_init(&ne->exi_root->exi_lock, NULL, MUTEX_DEFAULT, NULL);
926
927 ASSERT(curzone->zone_id == ng->nfs_zoneid);
928 ne->exi_root->exi_vp = ZONE_ROOTVP();
929 ne->exi_root->exi_zoneid = ng->nfs_zoneid;
930
931 /*
932 * Fill in ne->exi_rootfid later, in nfs_export_get_rootfid
933 * because we can't correctly return errors here.
934 */
935
936 /* Initialize auth cache and auth cache lock */
937 for (i = 0; i < AUTH_TABLESIZE; i++) {
938 ne->exi_root->exi_cache[i] = kmem_alloc(sizeof (avl_tree_t),
939 KM_SLEEP);
940 avl_create(ne->exi_root->exi_cache[i],
941 nfsauth_cache_clnt_compar, sizeof (struct auth_cache_clnt),
942 offsetof(struct auth_cache_clnt, authc_link));
943 }
944 rw_init(&ne->exi_root->exi_cache_lock, NULL, RW_DEFAULT, NULL);
945
946 /* setup exi_fh later, in nfs_export_get_rootfid */
947
948 rw_enter(&ne->exported_lock, RW_WRITER);
949
950 /* Publish the exportinfo in the hash table */
951 export_link(ne, ne->exi_root);
952
953 /* Initialize exi_id and exi_kstats */
954 mutex_enter(&nfs_exi_id_lock);
955 ne->exi_root->exi_id = exi_id_get_next();
956 avl_add(&exi_id_tree, ne->exi_root);
957 mutex_exit(&nfs_exi_id_lock);
958
959 rw_exit(&ne->exported_lock);
960 ne->ns_root = NULL;
961
962 ng->nfs_export = ne;
963 }
964
965 /*
966 * During zone shutdown, remove exports
967 */
968 void
969 nfs_export_zone_shutdown(nfs_globals_t *ng)
970 {
971 nfs_export_t *ne = ng->nfs_export;
972 struct exportinfo *exi, *nexi;
973 int i, errors;
974
975 rw_enter(&ne->exported_lock, RW_READER);
976
977 errors = 0;
978 for (i = 0; i < EXPTABLESIZE; i++) {
979
980 exi = ne->exptable[i];
981 if (exi != NULL)
982 exi_hold(exi);
983
984 while (exi != NULL) {
985
986 /*
987 * Get and hold next export before
988 * dropping the rwlock and unexport
989 */
990 nexi = exi->fid_hash.next;
991 if (nexi != NULL)
992 exi_hold(nexi);
993
994 rw_exit(&ne->exported_lock);
995
996 /*
997 * Skip ne->exi_root which gets special
998 * create/destroy handling.
999 */
1000 if (exi != ne->exi_root &&
1001 unexport(ne, exi) != 0)
1002 errors++;
1003 exi_rele(exi);
1004
1005 rw_enter(&ne->exported_lock, RW_READER);
1006 exi = nexi;
1007 }
1008 }
1009 if (errors > 0) {
1010 cmn_err(CE_NOTE,
1011 "NFS: failed un-exports in zone %d",
1012 (int) ng->nfs_zoneid);
1013 }
1014
1015 rw_exit(&ne->exported_lock);
1016 }
1017
1018 void
1019 nfs_export_zone_fini(nfs_globals_t *ng)
1020 {
1021 int i;
1022 nfs_export_t *ne = ng->nfs_export;
1023 struct exportinfo *exi;
1024
1025 ng->nfs_export = NULL;
1026
1027 rw_enter(&ne->exported_lock, RW_WRITER);
1028
1029 mutex_enter(&nfs_exi_id_lock);
1030 avl_remove(&exi_id_tree, ne->exi_root);
1031 mutex_exit(&nfs_exi_id_lock);
1032
1033 export_unlink(ne, ne->exi_root);
1034
1035 rw_exit(&ne->exported_lock);
1036
1037 /* Deallocate the place holder for the public file handle */
1038 srv_secinfo_list_free(ne->exi_root->exi_export.ex_secinfo,
1039 ne->exi_root->exi_export.ex_seccnt);
1040 mutex_destroy(&ne->exi_root->exi_lock);
1041
1042 rw_destroy(&ne->exi_root->exi_cache_lock);
1043 for (i = 0; i < AUTH_TABLESIZE; i++) {
1044 avl_destroy(ne->exi_root->exi_cache[i]);
1045 kmem_free(ne->exi_root->exi_cache[i], sizeof (avl_tree_t));
1046 }
1047
1048 kmem_free(ne->exi_root->exi_export.ex_path,
1049 ne->exi_root->exi_export.ex_pathlen + 1);
1050 kmem_free(ne->exi_root, sizeof (*ne->exi_root));
1051
1052 /*
1053 * The shutdown hook should have left the exi_id_tree
1054 * with nothing belonging to this zone.
1055 */
1056 mutex_enter(&nfs_exi_id_lock);
1057 i = 0;
1058 exi = avl_first(&exi_id_tree);
1059 while (exi != NULL) {
1060 if (exi->exi_zoneid == ng->nfs_zoneid)
1061 i++;
1062 exi = AVL_NEXT(&exi_id_tree, exi);
1063 }
1064 mutex_exit(&nfs_exi_id_lock);
1065 if (i > 0) {
1066 cmn_err(CE_NOTE,
1067 "NFS: zone %d has %d export IDs left after shutdown",
1068 (int) ng->nfs_zoneid, i);
1069 }
1070 rw_destroy(&ne->exported_lock);
1071 kmem_free(ne, sizeof (*ne));
1072 }
1073
1074 /*
1075 * Initialization routine for export routines.
1076 * Should only be called once.
1077 */
1078 void
1079 nfs_exportinit(void)
1080 {
1081 mutex_init(&nfs_exi_id_lock, NULL, MUTEX_DEFAULT, NULL);
1082
1083 /* exi_id handling initialization */
1084 exi_id_next = 0;
1085 exi_id_overflow = FALSE;
1086 avl_create(&exi_id_tree, exi_id_compar, sizeof (struct exportinfo),
1087 offsetof(struct exportinfo, exi_id_link));
1088
1089 nfslog_init();
1090 }
1091
1092 /*
1093 * Finalization routine for export routines.
1094 */
1095 void
1096 nfs_exportfini(void)
1097 {
1098 avl_destroy(&exi_id_tree);
1099 mutex_destroy(&nfs_exi_id_lock);
1100 }
1101
1102 /*
1103 * Check if 2 gss mechanism identifiers are the same.
1104 *
1105 * return FALSE if not the same.
1106 * return TRUE if the same.
1107 */
1108 static bool_t
1109 nfs_mech_equal(rpc_gss_OID mech1, rpc_gss_OID mech2)
1110 {
1111 if ((mech1->length == 0) && (mech2->length == 0))
1112 return (TRUE);
1113
1114 if (mech1->length != mech2->length)
1115 return (FALSE);
1116
1117 return (bcmp(mech1->elements, mech2->elements, mech1->length) == 0);
1118 }
1119
1120 /*
1121 * This routine is used by rpc to map rpc security number
1122 * to nfs specific security flavor number.
1123 *
1124 * The gss callback prototype is
1125 * callback(struct svc_req *, gss_cred_id_t *, gss_ctx_id_t *,
1126 * rpc_gss_lock_t *, void **),
1127 * since nfs does not use the gss_cred_id_t/gss_ctx_id_t arguments
1128 * we cast them to void.
1129 */
1130 /*ARGSUSED*/
1131 bool_t
1132 rfs_gsscallback(struct svc_req *req, gss_cred_id_t deleg, void *gss_context,
1133 rpc_gss_lock_t *lock, void **cookie)
1134 {
1135 int i, j;
1136 rpc_gss_rawcred_t *raw_cred;
1137 struct exportinfo *exi;
1138 nfs_export_t *ne = nfs_get_export();
1139
1140 /*
1141 * We don't deal with delegated credentials.
1142 */
1143 if (deleg != GSS_C_NO_CREDENTIAL)
1144 return (FALSE);
1145
1146 raw_cred = lock->raw_cred;
1147 *cookie = NULL;
1148
1149 rw_enter(&ne->exported_lock, RW_READER);
1150
1151 for (i = 0; i < EXPTABLESIZE; i++) {
1152 exi = ne->exptable[i];
1153 while (exi) {
1154 if (exi->exi_export.ex_seccnt > 0) {
1155 struct secinfo *secp;
1156 seconfig_t *se;
1157 int seccnt;
1158
1159 secp = exi->exi_export.ex_secinfo;
1160 seccnt = exi->exi_export.ex_seccnt;
1161 for (j = 0; j < seccnt; j++) {
1162 /*
1163 * If there is a map of the triplet
1164 * (mechanism, service, qop) between
1165 * raw_cred and the exported flavor,
1166 * get the psudo flavor number.
1167 * Also qop should not be NULL, it
1168 * should be "default" or something
1169 * else.
1170 */
1171 se = &secp[j].s_secinfo;
1172 if ((se->sc_rpcnum == RPCSEC_GSS) &&
1173
1174 (nfs_mech_equal(
1175 se->sc_gss_mech_type,
1176 raw_cred->mechanism)) &&
1177
1178 (se->sc_service ==
1179 raw_cred->service) &&
1180 (raw_cred->qop == se->sc_qop)) {
1181
1182 *cookie = (void *)(uintptr_t)
1183 se->sc_nfsnum;
1184 goto done;
1185 }
1186 }
1187 }
1188 exi = exi->fid_hash.next;
1189 }
1190 }
1191 done:
1192 rw_exit(&ne->exported_lock);
1193
1194 /*
1195 * If no nfs pseudo number mapping can be found in the export
1196 * table, assign the nfsflavor to NFS_FLAVOR_NOMAP. In V4, we may
1197 * recover the flavor mismatch from NFS layer (NFS4ERR_WRONGSEC).
1198 *
1199 * For example:
1200 * server first shares with krb5i;
1201 * client mounts with krb5i;
1202 * server re-shares with krb5p;
1203 * client tries with krb5i, but no mapping can be found;
1204 * rpcsec_gss module calls this routine to do the mapping,
1205 * if this routine fails, request is rejected from
1206 * the rpc layer.
1207 * What we need is to let the nfs layer rejects the request.
1208 * For V4, we can reject with NFS4ERR_WRONGSEC and the client
1209 * may recover from it by getting the new flavor via SECINFO.
1210 *
1211 * nfs pseudo number for RPCSEC_GSS mapping (see nfssec.conf)
1212 * is owned by IANA (see RFC 2623).
1213 *
1214 * XXX NFS_FLAVOR_NOMAP is defined in Solaris to work around
1215 * the implementation issue. This number should not overlap with
1216 * any new IANA defined pseudo flavor numbers.
1217 */
1218 if (*cookie == NULL)
1219 *cookie = (void *)NFS_FLAVOR_NOMAP;
1220
1221 lock->locked = TRUE;
1222
1223 return (TRUE);
1224 }
1225
1226
1227 /*
1228 * Exportfs system call; credentials should be checked before
1229 * calling this function.
1230 */
1231 int
1232 exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
1233 {
1234 vnode_t *vp;
1235 vnode_t *dvp;
1236 struct exportdata *kex;
1237 struct exportinfo *exi = NULL;
1238 struct exportinfo *ex, *ex1, *ex2;
1239 fid_t fid;
1240 fsid_t fsid;
1241 int error;
1242 size_t allocsize;
1243 struct secinfo *sp;
1244 struct secinfo *exs;
1245 rpc_gss_callback_t cb;
1246 char *pathbuf;
1247 char *log_buffer;
1248 char *tagbuf;
1249 int callback;
1250 int allocd_seccnt;
1251 STRUCT_HANDLE(exportfs_args, uap);
1252 STRUCT_DECL(exportdata, uexi);
1253 struct secinfo newsec[MAX_FLAVORS];
1254 int newcnt;
1255 struct secinfo oldsec[MAX_FLAVORS];
1256 int oldcnt;
1257 int i;
1258 struct pathname lookpn;
1259 nfs_export_t *ne = nfs_get_export();
1260
1261 STRUCT_SET_HANDLE(uap, model, args);
1262
1263 /* Read in pathname from userspace */
1264 if (error = pn_get(STRUCT_FGETP(uap, dname), UIO_USERSPACE, &lookpn))
1265 return (error);
1266
1267 /* Walk the export list looking for that pathname */
1268 rw_enter(&ne->exported_lock, RW_READER);
1269 DTRACE_PROBE(nfss__i__exported_lock1_start);
1270 for (ex1 = ne->exptable_path_hash[pkp_tab_hash(lookpn.pn_path,
1271 strlen(lookpn.pn_path))]; ex1; ex1 = ex1->path_hash.next) {
1272 if (ex1 != ne->exi_root && 0 ==
1273 strcmp(ex1->exi_export.ex_path, lookpn.pn_path)) {
1274 exi_hold(ex1);
1275 break;
1276 }
1277 }
1278 DTRACE_PROBE(nfss__i__exported_lock1_stop);
1279 rw_exit(&ne->exported_lock);
1280
1281 /* Is this an unshare? */
1282 if (STRUCT_FGETP(uap, uex) == NULL) {
1283 pn_free(&lookpn);
1284 if (ex1 == NULL)
1285 return (EINVAL);
1286 error = unexport(ne, ex1);
1287 exi_rele(ex1);
1288 return (error);
1289 }
1290
1291 /* It is a share or a re-share */
1292 error = lookupname(STRUCT_FGETP(uap, dname), UIO_USERSPACE,
1293 FOLLOW, &dvp, &vp);
1294 if (error == EINVAL) {
1295 /*
1296 * if fname resolves to / we get EINVAL error
1297 * since we wanted the parent vnode. Try again
1298 * with NULL dvp.
1299 */
1300 error = lookupname(STRUCT_FGETP(uap, dname), UIO_USERSPACE,
1301 FOLLOW, NULL, &vp);
1302 dvp = NULL;
1303 }
1304 if (!error && vp == NULL) {
1305 /* Last component of fname not found */
1306 if (dvp != NULL)
1307 VN_RELE(dvp);
1308 error = ENOENT;
1309 }
1310 if (error) {
1311 pn_free(&lookpn);
1312 if (ex1)
1313 exi_rele(ex1);
1314 return (error);
1315 }
1316
1317 /*
1318 * 'vp' may be an AUTOFS node, so we perform a
1319 * VOP_ACCESS() to trigger the mount of the
1320 * intended filesystem, so we can share the intended
1321 * filesystem instead of the AUTOFS filesystem.
1322 */
1323 (void) VOP_ACCESS(vp, 0, 0, cr, NULL);
1324
1325 /*
1326 * We're interested in the top most filesystem.
1327 * This is specially important when uap->dname is a trigger
1328 * AUTOFS node, since we're really interested in sharing the
1329 * filesystem AUTOFS mounted as result of the VOP_ACCESS()
1330 * call not the AUTOFS node itself.
1331 */
1332 if (vn_mountedvfs(vp) != NULL) {
1333 if (error = traverse(&vp)) {
1334 VN_RELE(vp);
1335 if (dvp != NULL)
1336 VN_RELE(dvp);
1337 pn_free(&lookpn);
1338 if (ex1)
1339 exi_rele(ex1);
1340 return (error);
1341 }
1342 }
1343
1344 /* Do not allow sharing another vnode for already shared path */
1345 if (ex1 && !PSEUDO(ex1) && !VN_CMP(ex1->exi_vp, vp)) {
1346 VN_RELE(vp);
1347 if (dvp != NULL)
1348 VN_RELE(dvp);
1349 pn_free(&lookpn);
1350 exi_rele(ex1);
1351 return (EEXIST);
1352 }
1353 if (ex1)
1354 exi_rele(ex1);
1355
1356 /*
1357 * Get the vfs id
1358 */
1359 bzero(&fid, sizeof (fid));
1360 fid.fid_len = MAXFIDSZ;
1361 error = VOP_FID(vp, &fid, NULL);
1362 fsid = vp->v_vfsp->vfs_fsid;
1363
1364 if (error) {
1365 VN_RELE(vp);
1366 if (dvp != NULL)
1367 VN_RELE(dvp);
1368 /*
1369 * If VOP_FID returns ENOSPC then the fid supplied
1370 * is too small. For now we simply return EREMOTE.
1371 */
1372 if (error == ENOSPC)
1373 error = EREMOTE;
1374 pn_free(&lookpn);
1375 return (error);
1376 }
1377
1378 /*
1379 * Do not allow re-sharing a shared vnode under a different path
1380 * PSEUDO export has ex_path fabricated, e.g. "/tmp (pseudo)", skip it.
1381 */
1382 rw_enter(&ne->exported_lock, RW_READER);
1383 DTRACE_PROBE(nfss__i__exported_lock2_start);
1384 for (ex2 = ne->exptable[exptablehash(&fsid, &fid)]; ex2;
1385 ex2 = ex2->fid_hash.next) {
1386 if (ex2 != ne->exi_root && !PSEUDO(ex2) &&
1387 VN_CMP(ex2->exi_vp, vp) &&
1388 strcmp(ex2->exi_export.ex_path, lookpn.pn_path) != 0) {
1389 DTRACE_PROBE(nfss__i__exported_lock2_stop);
1390 rw_exit(&ne->exported_lock);
1391 VN_RELE(vp);
1392 if (dvp != NULL)
1393 VN_RELE(dvp);
1394 pn_free(&lookpn);
1395 return (EEXIST);
1396 }
1397 }
1398 DTRACE_PROBE(nfss__i__exported_lock2_stop);
1399 rw_exit(&ne->exported_lock);
1400 pn_free(&lookpn);
1401
1402 exi = kmem_zalloc(sizeof (*exi), KM_SLEEP);
1403 exi->exi_fsid = fsid;
1404 exi->exi_fid = fid;
1405 exi->exi_vp = vp;
1406 exi->exi_count = 1;
1407 exi->exi_zone = crgetzone(cr);
1408 ASSERT(exi->exi_zone != NULL); /* XXX KEBE ASKS... */
1409 ASSERT3P(exi->exi_zone, ==, curzone); /* ... are these legit? */
1410 exi->exi_volatile_dev = (vfssw[vp->v_vfsp->vfs_fstype].vsw_flag &
1411 VSW_VOLATILEDEV) ? 1 : 0;
1412 mutex_init(&exi->exi_lock, NULL, MUTEX_DEFAULT, NULL);
1413 exi->exi_dvp = dvp;
1414
1415 /*
1416 * Initialize auth cache and auth cache lock
1417 */
1418 for (i = 0; i < AUTH_TABLESIZE; i++) {
1419 exi->exi_cache[i] = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
1420 avl_create(exi->exi_cache[i], nfsauth_cache_clnt_compar,
1421 sizeof (struct auth_cache_clnt),
1422 offsetof(struct auth_cache_clnt, authc_link));
1423 }
1424 rw_init(&exi->exi_cache_lock, NULL, RW_DEFAULT, NULL);
1425
1426 /*
1427 * Build up the template fhandle
1428 */
1429 exi->exi_fh.fh_fsid = fsid;
1430 if (exi->exi_fid.fid_len > sizeof (exi->exi_fh.fh_xdata)) {
1431 error = EREMOTE;
1432 goto out1;
1433 }
1434 exi->exi_fh.fh_xlen = exi->exi_fid.fid_len;
1435 bcopy(exi->exi_fid.fid_data, exi->exi_fh.fh_xdata,
1436 exi->exi_fid.fid_len);
1437
1438 exi->exi_fh.fh_len = sizeof (exi->exi_fh.fh_data);
1439
1440 kex = &exi->exi_export;
1441
1442 /*
1443 * Load in everything, and do sanity checking
1444 */
1445 STRUCT_INIT(uexi, model);
1446 if (copyin(STRUCT_FGETP(uap, uex), STRUCT_BUF(uexi),
1447 STRUCT_SIZE(uexi))) {
1448 error = EFAULT;
1449 goto out1;
1450 }
1451
1452 kex->ex_version = STRUCT_FGET(uexi, ex_version);
1453 if (kex->ex_version != EX_CURRENT_VERSION) {
1454 error = EINVAL;
1455 cmn_err(CE_WARN,
1456 "NFS: exportfs requires export struct version 2 - got %d\n",
1457 kex->ex_version);
1458 goto out1;
1459 }
1460
1461 /*
1462 * Must have at least one security entry
1463 */
1464 kex->ex_seccnt = STRUCT_FGET(uexi, ex_seccnt);
1465 if (kex->ex_seccnt < 1) {
1466 error = EINVAL;
1467 goto out1;
1468 }
1469
1470 kex->ex_path = STRUCT_FGETP(uexi, ex_path);
1471 kex->ex_pathlen = STRUCT_FGET(uexi, ex_pathlen);
1472 kex->ex_flags = STRUCT_FGET(uexi, ex_flags);
1473 kex->ex_anon = STRUCT_FGET(uexi, ex_anon);
1474 kex->ex_secinfo = STRUCT_FGETP(uexi, ex_secinfo);
1475 kex->ex_index = STRUCT_FGETP(uexi, ex_index);
1476 kex->ex_log_buffer = STRUCT_FGETP(uexi, ex_log_buffer);
1477 kex->ex_log_bufferlen = STRUCT_FGET(uexi, ex_log_bufferlen);
1478 kex->ex_tag = STRUCT_FGETP(uexi, ex_tag);
1479 kex->ex_taglen = STRUCT_FGET(uexi, ex_taglen);
1480
1481 /*
1482 * Copy the exported pathname into
1483 * an appropriately sized buffer.
1484 */
1485 pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1486 if (copyinstr(kex->ex_path, pathbuf, MAXPATHLEN, &kex->ex_pathlen)) {
1487 kmem_free(pathbuf, MAXPATHLEN);
1488 error = EFAULT;
1489 goto out1;
1490 }
1491 kex->ex_path = kmem_alloc(kex->ex_pathlen + 1, KM_SLEEP);
1492 bcopy(pathbuf, kex->ex_path, kex->ex_pathlen);
1493 kex->ex_path[kex->ex_pathlen] = '\0';
1494 kmem_free(pathbuf, MAXPATHLEN);
1495
1496 /*
1497 * Get the path to the logging buffer and the tag
1498 */
1499 if (kex->ex_flags & EX_LOG) {
1500 log_buffer = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1501 if (copyinstr(kex->ex_log_buffer, log_buffer, MAXPATHLEN,
1502 &kex->ex_log_bufferlen)) {
1503 kmem_free(log_buffer, MAXPATHLEN);
1504 error = EFAULT;
1505 goto out2;
1506 }
1507 kex->ex_log_buffer =
1508 kmem_alloc(kex->ex_log_bufferlen + 1, KM_SLEEP);
1509 bcopy(log_buffer, kex->ex_log_buffer, kex->ex_log_bufferlen);
1510 kex->ex_log_buffer[kex->ex_log_bufferlen] = '\0';
1511 kmem_free(log_buffer, MAXPATHLEN);
1512
1513 tagbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1514 if (copyinstr(kex->ex_tag, tagbuf, MAXPATHLEN,
1515 &kex->ex_taglen)) {
1516 kmem_free(tagbuf, MAXPATHLEN);
1517 error = EFAULT;
1518 goto out3;
1519 }
1520 kex->ex_tag = kmem_alloc(kex->ex_taglen + 1, KM_SLEEP);
1521 bcopy(tagbuf, kex->ex_tag, kex->ex_taglen);
1522 kex->ex_tag[kex->ex_taglen] = '\0';
1523 kmem_free(tagbuf, MAXPATHLEN);
1524 }
1525
1526 /*
1527 * Load the security information for each flavor
1528 */
1529 allocsize = kex->ex_seccnt * SIZEOF_STRUCT(secinfo, model);
1530 sp = kmem_zalloc(allocsize, KM_SLEEP);
1531 if (copyin(kex->ex_secinfo, sp, allocsize)) {
1532 kmem_free(sp, allocsize);
1533 error = EFAULT;
1534 goto out4;
1535 }
1536
1537 /*
1538 * All of these nested structures need to be converted to
1539 * the kernel native format.
1540 */
1541 if (model != DATAMODEL_NATIVE) {
1542 size_t allocsize2;
1543 struct secinfo *sp2;
1544
1545 allocsize2 = kex->ex_seccnt * sizeof (struct secinfo);
1546 sp2 = kmem_zalloc(allocsize2, KM_SLEEP);
1547
1548 for (i = 0; i < kex->ex_seccnt; i++) {
1549 STRUCT_HANDLE(secinfo, usi);
1550
1551 STRUCT_SET_HANDLE(usi, model,
1552 (struct secinfo *)((caddr_t)sp +
1553 (i * SIZEOF_STRUCT(secinfo, model))));
1554 bcopy(STRUCT_FGET(usi, s_secinfo.sc_name),
1555 sp2[i].s_secinfo.sc_name, MAX_NAME_LEN);
1556 sp2[i].s_secinfo.sc_nfsnum =
1557 STRUCT_FGET(usi, s_secinfo.sc_nfsnum);
1558 sp2[i].s_secinfo.sc_rpcnum =
1559 STRUCT_FGET(usi, s_secinfo.sc_rpcnum);
1560 bcopy(STRUCT_FGET(usi, s_secinfo.sc_gss_mech),
1561 sp2[i].s_secinfo.sc_gss_mech, MAX_NAME_LEN);
1562 sp2[i].s_secinfo.sc_gss_mech_type =
1563 STRUCT_FGETP(usi, s_secinfo.sc_gss_mech_type);
1564 sp2[i].s_secinfo.sc_qop =
1565 STRUCT_FGET(usi, s_secinfo.sc_qop);
1566 sp2[i].s_secinfo.sc_service =
1567 STRUCT_FGET(usi, s_secinfo.sc_service);
1568
1569 sp2[i].s_flags = STRUCT_FGET(usi, s_flags);
1570 sp2[i].s_window = STRUCT_FGET(usi, s_window);
1571 sp2[i].s_rootid = STRUCT_FGET(usi, s_rootid);
1572 sp2[i].s_rootcnt = STRUCT_FGET(usi, s_rootcnt);
1573 sp2[i].s_rootnames = STRUCT_FGETP(usi, s_rootnames);
1574 }
1575 kmem_free(sp, allocsize);
1576 sp = sp2;
1577 allocsize = allocsize2;
1578 }
1579
1580 kex->ex_secinfo = sp;
1581
1582 /*
1583 * And now copy rootnames for each individual secinfo.
1584 */
1585 callback = 0;
1586 allocd_seccnt = 0;
1587 while (allocd_seccnt < kex->ex_seccnt) {
1588
1589 exs = &sp[allocd_seccnt];
1590 if (exs->s_rootcnt > 0) {
1591 if (!sec_svc_loadrootnames(exs->s_secinfo.sc_rpcnum,
1592 exs->s_rootcnt, &exs->s_rootnames, model)) {
1593 error = EFAULT;
1594 goto out5;
1595 }
1596 }
1597
1598 if (exs->s_secinfo.sc_rpcnum == RPCSEC_GSS) {
1599 rpc_gss_OID mech_tmp;
1600 STRUCT_DECL(rpc_gss_OID_s, umech_tmp);
1601 caddr_t elements_tmp;
1602
1603 /* Copyin mechanism type */
1604 STRUCT_INIT(umech_tmp, model);
1605 mech_tmp = kmem_alloc(sizeof (*mech_tmp), KM_SLEEP);
1606 if (copyin(exs->s_secinfo.sc_gss_mech_type,
1607 STRUCT_BUF(umech_tmp), STRUCT_SIZE(umech_tmp))) {
1608 kmem_free(mech_tmp, sizeof (*mech_tmp));
1609 error = EFAULT;
1610 goto out5;
1611 }
1612 mech_tmp->length = STRUCT_FGET(umech_tmp, length);
1613 mech_tmp->elements = STRUCT_FGETP(umech_tmp, elements);
1614
1615 elements_tmp = kmem_alloc(mech_tmp->length, KM_SLEEP);
1616 if (copyin(mech_tmp->elements, elements_tmp,
1617 mech_tmp->length)) {
1618 kmem_free(elements_tmp, mech_tmp->length);
1619 kmem_free(mech_tmp, sizeof (*mech_tmp));
1620 error = EFAULT;
1621 goto out5;
1622 }
1623 mech_tmp->elements = elements_tmp;
1624 exs->s_secinfo.sc_gss_mech_type = mech_tmp;
1625 allocd_seccnt++;
1626
1627 callback = 1;
1628 } else
1629 allocd_seccnt++;
1630 }
1631
1632 /*
1633 * Init the secinfo reference count and mark these flavors
1634 * explicitly exported flavors.
1635 */
1636 for (i = 0; i < kex->ex_seccnt; i++) {
1637 kex->ex_secinfo[i].s_flags |= M_4SEC_EXPORTED;
1638 kex->ex_secinfo[i].s_refcnt = 1;
1639 }
1640
1641 /*
1642 * Set up rpcsec_gss callback routine entry if any.
1643 */
1644 if (callback) {
1645 cb.callback = rfs_gsscallback;
1646 cb.program = NFS_ACL_PROGRAM;
1647 for (cb.version = NFS_ACL_VERSMIN;
1648 cb.version <= NFS_ACL_VERSMAX; cb.version++) {
1649 (void) sec_svc_control(RPC_SVC_SET_GSS_CALLBACK,
1650 (void *)&cb);
1651 }
1652
1653 cb.program = NFS_PROGRAM;
1654 for (cb.version = NFS_VERSMIN;
1655 cb.version <= NFS_VERSMAX; cb.version++) {
1656 (void) sec_svc_control(RPC_SVC_SET_GSS_CALLBACK,
1657 (void *)&cb);
1658 }
1659 }
1660
1661 /*
1662 * Check the index flag. Do this here to avoid holding the
1663 * lock while dealing with the index option (as we do with
1664 * the public option).
1665 */
1666 if (kex->ex_flags & EX_INDEX) {
1667 if (!kex->ex_index) { /* sanity check */
1668 error = EINVAL;
1669 goto out5;
1670 }
1671 if (error = loadindex(kex))
1672 goto out5;
1673 }
1674
1675 if (kex->ex_flags & EX_LOG) {
1676 if (error = nfslog_setup(exi))
1677 goto out6;
1678 }
1679
1680 /*
1681 * Insert the new entry at the front of the export list
1682 */
1683 rw_enter(&ne->exported_lock, RW_WRITER);
1684 DTRACE_PROBE(nfss__i__exported_lock3_start);
1685
1686 export_link(ne, exi);
1687
1688 /*
1689 * Check the rest of the list for an old entry for the fs.
1690 * If one is found then unlink it, wait until this is the
1691 * only reference and then free it.
1692 */
1693 for (ex = exi->fid_hash.next; ex != NULL; ex = ex->fid_hash.next) {
1694 if (ex != ne->exi_root && VN_CMP(ex->exi_vp, vp)) {
1695 mutex_enter(&nfs_exi_id_lock);
1696 avl_remove(&exi_id_tree, ex);
1697 mutex_exit(&nfs_exi_id_lock);
1698 export_unlink(ne, ex);
1699 break;
1700 }
1701 }
1702
1703 /*
1704 * If the public filehandle is pointing at the
1705 * old entry, then point it back at the root.
1706 */
1707 if (ex != NULL && ex == ne->exi_public)
1708 ne->exi_public = ne->exi_root;
1709
1710 /*
1711 * If the public flag is on, make the global exi_public
1712 * point to this entry and turn off the public bit so that
1713 * we can distinguish it from the place holder export.
1714 */
1715 if (kex->ex_flags & EX_PUBLIC) {
1716 ne->exi_public = exi;
1717 kex->ex_flags &= ~EX_PUBLIC;
1718 }
1719
1720 #ifdef VOLATILE_FH_TEST
1721 /*
1722 * Set up the volatile_id value if volatile on share.
1723 * The list of volatile renamed filehandles is always destroyed,
1724 * if the fs was reshared.
1725 */
1726 if (kex->ex_flags & EX_VOLFH)
1727 exi->exi_volatile_id = gethrestime_sec();
1728
1729 mutex_init(&exi->exi_vol_rename_lock, NULL, MUTEX_DEFAULT, NULL);
1730 #endif /* VOLATILE_FH_TEST */
1731
1732 /*
1733 * If this is a new export, then climb up
1734 * the tree and check if any pseudo exports
1735 * need to be created to provide a path for
1736 * NFS v4 clients.
1737 */
1738 if (ex == NULL) {
1739 error = treeclimb_export(exi);
1740 if (error)
1741 goto out7;
1742 } else {
1743 /* If it's a re-export update namespace tree */
1744 exi->exi_tree = ex->exi_tree;
1745 exi->exi_tree->tree_exi = exi;
1746
1747 /* Update the change timestamp */
1748 tree_update_change(ne, exi->exi_tree, NULL);
1749 }
1750
1751 /*
1752 * build a unique flavor list from the flavors specified
1753 * in the share cmd. unique means that each flavor only
1754 * appears once in the secinfo list -- no duplicates allowed.
1755 */
1756 newcnt = build_seclist_nodups(&exi->exi_export, newsec, FALSE);
1757
1758 srv_secinfo_treeclimb(ne, exi, newsec, newcnt, TRUE);
1759
1760 /*
1761 * If re-sharing an old export entry, update the secinfo data
1762 * depending on if the old entry is a pseudo node or not.
1763 */
1764 if (ex != NULL) {
1765 oldcnt = build_seclist_nodups(&ex->exi_export, oldsec, FALSE);
1766 if (PSEUDO(ex)) {
1767 /*
1768 * The dir being shared is a pseudo export root (which
1769 * will be transformed into a real export root). The
1770 * flavor(s) of the new share were propagated to the
1771 * ancestors by srv_secinfo_treeclimb() above. Now
1772 * transfer the implicit flavor refs from the old
1773 * pseudo exprot root to the new (real) export root.
1774 */
1775 srv_secinfo_add(&exi->exi_export.ex_secinfo,
1776 &exi->exi_export.ex_seccnt, oldsec, oldcnt, TRUE);
1777 } else {
1778 /*
1779 * First transfer implicit flavor refs to new export.
1780 * Remove old flavor refs last.
1781 */
1782 srv_secinfo_exp2exp(&exi->exi_export, oldsec, oldcnt);
1783 srv_secinfo_treeclimb(ne, ex, oldsec, oldcnt, FALSE);
1784 }
1785 }
1786
1787 /*
1788 * If it's a re-export and the old entry has a pseudonode list,
1789 * transfer it to the new export.
1790 */
1791 if (ex != NULL && (ex->exi_visible != NULL)) {
1792 exi->exi_visible = ex->exi_visible;
1793 ex->exi_visible = NULL;
1794 }
1795
1796 /*
1797 * Initialize exi_id and exi_kstats
1798 */
1799 if (ex != NULL) {
1800 exi->exi_id = ex->exi_id;
1801 } else {
1802 mutex_enter(&nfs_exi_id_lock);
1803 exi->exi_id = exi_id_get_next();
1804 mutex_exit(&nfs_exi_id_lock);
1805 }
1806 mutex_enter(&nfs_exi_id_lock);
1807 avl_add(&exi_id_tree, exi);
1808 mutex_exit(&nfs_exi_id_lock);
1809
1810 DTRACE_PROBE(nfss__i__exported_lock3_stop);
1811 rw_exit(&ne->exported_lock);
1812
1813 if (ne->exi_public == exi || kex->ex_flags & EX_LOG) {
1814 /*
1815 * Log share operation to this buffer only.
1816 */
1817 nfslog_share_record(exi, cr);
1818 }
1819
1820 if (ex != NULL)
1821 exi_rele(ex);
1822
1823 return (0);
1824
1825 out7:
1826 /* Unlink the new export in exptable. */
1827 export_unlink(ne, exi);
1828 DTRACE_PROBE(nfss__i__exported_lock3_stop);
1829 rw_exit(&ne->exported_lock);
1830 out6:
1831 if (kex->ex_flags & EX_INDEX)
1832 kmem_free(kex->ex_index, strlen(kex->ex_index) + 1);
1833 out5:
1834 /* free partially completed allocation */
1835 while (--allocd_seccnt >= 0) {
1836 exs = &kex->ex_secinfo[allocd_seccnt];
1837 srv_secinfo_entry_free(exs);
1838 }
1839
1840 if (kex->ex_secinfo) {
1841 kmem_free(kex->ex_secinfo,
1842 kex->ex_seccnt * sizeof (struct secinfo));
1843 }
1844
1845 out4:
1846 if ((kex->ex_flags & EX_LOG) && kex->ex_tag != NULL)
1847 kmem_free(kex->ex_tag, kex->ex_taglen + 1);
1848 out3:
1849 if ((kex->ex_flags & EX_LOG) && kex->ex_log_buffer != NULL)
1850 kmem_free(kex->ex_log_buffer, kex->ex_log_bufferlen + 1);
1851 out2:
1852 kmem_free(kex->ex_path, kex->ex_pathlen + 1);
1853 out1:
1854 VN_RELE(vp);
1855 if (dvp != NULL)
1856 VN_RELE(dvp);
1857 mutex_destroy(&exi->exi_lock);
1858 rw_destroy(&exi->exi_cache_lock);
1859 for (i = 0; i < AUTH_TABLESIZE; i++) {
1860 avl_destroy(exi->exi_cache[i]);
1861 kmem_free(exi->exi_cache[i], sizeof (avl_tree_t));
1862 }
1863
1864 kmem_free(exi, sizeof (*exi));
1865
1866 return (error);
1867 }
1868
1869 /*
1870 * Remove the exportinfo from the export list
1871 */
1872 void
1873 export_unlink(nfs_export_t *ne, struct exportinfo *exi)
1874 {
1875 ASSERT(RW_WRITE_HELD(&ne->exported_lock));
1876
1877 exp_hash_unlink(exi, fid_hash);
1878 exp_hash_unlink(exi, path_hash);
1879 }
1880
1881 /*
1882 * Unexport an exported filesystem
1883 */
1884 static int
1885 unexport(nfs_export_t *ne, struct exportinfo *exi)
1886 {
1887 struct secinfo cursec[MAX_FLAVORS];
1888 int curcnt;
1889
1890 rw_enter(&ne->exported_lock, RW_WRITER);
1891
1892 /* Check if exi is still linked in the export table */
1893 if (!EXP_LINKED(exi) || PSEUDO(exi)) {
1894 rw_exit(&ne->exported_lock);
1895 return (EINVAL);
1896 }
1897
1898 mutex_enter(&nfs_exi_id_lock);
1899 avl_remove(&exi_id_tree, exi);
1900 mutex_exit(&nfs_exi_id_lock);
1901 export_unlink(ne, exi);
1902
1903 /*
1904 * Remove security flavors before treeclimb_unexport() is called
1905 * because srv_secinfo_treeclimb needs the namespace tree
1906 */
1907 curcnt = build_seclist_nodups(&exi->exi_export, cursec, TRUE);
1908 srv_secinfo_treeclimb(ne, exi, cursec, curcnt, FALSE);
1909
1910 /*
1911 * If there's a visible list, then need to leave
1912 * a pseudo export here to retain the visible list
1913 * for paths to exports below.
1914 */
1915 if (exi->exi_visible != NULL) {
1916 struct exportinfo *newexi;
1917
1918 newexi = pseudo_exportfs(ne, exi->exi_vp, &exi->exi_fid,
1919 exi->exi_visible, &exi->exi_export);
1920 exi->exi_visible = NULL;
1921
1922 /* interconnect the existing treenode with the new exportinfo */
1923 newexi->exi_zone = exi->exi_zone;
1924 newexi->exi_tree = exi->exi_tree;
1925 newexi->exi_tree->tree_exi = newexi;
1926
1927 /* Update the change timestamp */
1928 tree_update_change(ne, exi->exi_tree, NULL);
1929 } else {
1930 treeclimb_unexport(ne, exi);
1931 }
1932
1933 rw_exit(&ne->exported_lock);
1934
1935 /*
1936 * Need to call into the NFSv4 server and release all data
1937 * held on this particular export. This is important since
1938 * the v4 server may be holding file locks or vnodes under
1939 * this export.
1940 */
1941 rfs4_clean_state_exi(ne, exi);
1942
1943 /*
1944 * Notify the lock manager that the filesystem is being
1945 * unexported.
1946 */
1947 lm_unexport(exi);
1948
1949 /*
1950 * If this was a public export, restore
1951 * the public filehandle to the root.
1952 */
1953
1954 /*
1955 * XXX KEBE ASKS --> Should CRED() instead be
1956 * exi->exi_zone->zone_kcred?
1957 */
1958 if (exi == ne->exi_public) {
1959 ne->exi_public = ne->exi_root;
1960
1961 nfslog_share_record(ne->exi_public, CRED());
1962 }
1963
1964 if (exi->exi_export.ex_flags & EX_LOG)
1965 nfslog_unshare_record(exi, CRED());
1966
1967 exi_rele(exi);
1968 return (0);
1969 }
1970
1971 /*
1972 * Get file handle system call.
1973 * Takes file name and returns a file handle for it.
1974 * Credentials must be verified before calling.
1975 */
1976 int
1977 nfs_getfh(struct nfs_getfh_args *args, model_t model, cred_t *cr)
1978 {
1979 nfs_fh3 fh;
1980 char buf[NFS3_MAXFHSIZE];
1981 char *logptr, logbuf[NFS3_MAXFHSIZE];
1982 int l = NFS3_MAXFHSIZE;
1983 vnode_t *vp;
1984 vnode_t *dvp;
1985 struct exportinfo *exi;
1986 int error;
1987 int vers;
1988 STRUCT_HANDLE(nfs_getfh_args, uap);
1989
1990 #ifdef lint
1991 model = model; /* STRUCT macros don't always use it */
1992 #endif
1993
1994 STRUCT_SET_HANDLE(uap, model, args);
1995
1996 error = lookupname(STRUCT_FGETP(uap, fname), UIO_USERSPACE,
1997 FOLLOW, &dvp, &vp);
1998 if (error == EINVAL) {
1999 /*
2000 * if fname resolves to / we get EINVAL error
2001 * since we wanted the parent vnode. Try again
2002 * with NULL dvp.
2003 */
2004 error = lookupname(STRUCT_FGETP(uap, fname), UIO_USERSPACE,
2005 FOLLOW, NULL, &vp);
2006 dvp = NULL;
2007 }
2008 if (!error && vp == NULL) {
2009 /*
2010 * Last component of fname not found
2011 */
2012 if (dvp != NULL) {
2013 VN_RELE(dvp);
2014 }
2015 error = ENOENT;
2016 }
2017 if (error)
2018 return (error);
2019
2020 /*
2021 * 'vp' may be an AUTOFS node, so we perform a
2022 * VOP_ACCESS() to trigger the mount of the
2023 * intended filesystem, so we can share the intended
2024 * filesystem instead of the AUTOFS filesystem.
2025 */
2026 (void) VOP_ACCESS(vp, 0, 0, cr, NULL);
2027
2028 /*
2029 * We're interested in the top most filesystem.
2030 * This is specially important when uap->dname is a trigger
2031 * AUTOFS node, since we're really interested in sharing the
2032 * filesystem AUTOFS mounted as result of the VOP_ACCESS()
2033 * call not the AUTOFS node itself.
2034 */
2035 if (vn_mountedvfs(vp) != NULL) {
2036 if (error = traverse(&vp)) {
2037 VN_RELE(vp);
2038 if (dvp != NULL)
2039 VN_RELE(dvp);
2040 return (error);
2041 }
2042 }
2043
2044 vers = STRUCT_FGET(uap, vers);
2045 exi = nfs_vptoexi(dvp, vp, cr, NULL, &error, FALSE);
2046 if (!error) {
2047 if (vers == NFS_VERSION) {
2048 error = makefh((fhandle_t *)buf, vp, exi);
2049 l = NFS_FHSIZE;
2050 logptr = buf;
2051 } else if (vers == NFS_V3) {
2052 int i, sz, pad;
2053
2054 error = makefh3(&fh, vp, exi);
2055 l = RNDUP(fh.fh3_length);
2056 if (!error && (l > sizeof (fhandle3_t)))
2057 error = EREMOTE;
2058 logptr = logbuf;
2059 if (!error) {
2060 i = 0;
2061 sz = sizeof (fsid_t);
2062 bcopy(&fh.fh3_fsid, &buf[i], sz);
2063 i += sz;
2064
2065 /*
2066 * For backwards compatibility, the
2067 * fid length may be less than
2068 * NFS_FHMAXDATA, but it was always
2069 * encoded as NFS_FHMAXDATA bytes.
2070 */
2071
2072 sz = sizeof (ushort_t);
2073 bcopy(&fh.fh3_len, &buf[i], sz);
2074 i += sz;
2075 bcopy(fh.fh3_data, &buf[i], fh.fh3_len);
2076 i += fh.fh3_len;
2077 pad = (NFS_FHMAXDATA - fh.fh3_len);
2078 if (pad > 0) {
2079 bzero(&buf[i], pad);
2080 i += pad;
2081 l += pad;
2082 }
2083
2084 sz = sizeof (ushort_t);
2085 bcopy(&fh.fh3_xlen, &buf[i], sz);
2086 i += sz;
2087 bcopy(fh.fh3_xdata, &buf[i], fh.fh3_xlen);
2088 i += fh.fh3_xlen;
2089 pad = (NFS_FHMAXDATA - fh.fh3_xlen);
2090 if (pad > 0) {
2091 bzero(&buf[i], pad);
2092 i += pad;
2093 l += pad;
2094 }
2095 }
2096 /*
2097 * If we need to do NFS logging, the filehandle
2098 * must be downsized to 32 bytes.
2099 */
2100 if (!error && exi->exi_export.ex_flags & EX_LOG) {
2101 i = 0;
2102 sz = sizeof (fsid_t);
2103 bcopy(&fh.fh3_fsid, &logbuf[i], sz);
2104 i += sz;
2105 sz = sizeof (ushort_t);
2106 bcopy(&fh.fh3_len, &logbuf[i], sz);
2107 i += sz;
2108 sz = NFS_FHMAXDATA;
2109 bcopy(fh.fh3_data, &logbuf[i], sz);
2110 i += sz;
2111 sz = sizeof (ushort_t);
2112 bcopy(&fh.fh3_xlen, &logbuf[i], sz);
2113 i += sz;
2114 sz = NFS_FHMAXDATA;
2115 bcopy(fh.fh3_xdata, &logbuf[i], sz);
2116 i += sz;
2117 }
2118 }
2119 if (!error && exi->exi_export.ex_flags & EX_LOG) {
2120 nfslog_getfh(exi, (fhandle_t *)logptr,
2121 STRUCT_FGETP(uap, fname), UIO_USERSPACE, cr);
2122 }
2123 exi_rele(exi);
2124 if (!error) {
2125 if (copyout(&l, STRUCT_FGETP(uap, lenp), sizeof (int)))
2126 error = EFAULT;
2127 if (copyout(buf, STRUCT_FGETP(uap, fhp), l))
2128 error = EFAULT;
2129 }
2130 }
2131 VN_RELE(vp);
2132 if (dvp != NULL) {
2133 VN_RELE(dvp);
2134 }
2135 return (error);
2136 }
2137
2138 /*
2139 * Strategy: if vp is in the export list, then
2140 * return the associated file handle. Otherwise, ".."
2141 * once up the vp and try again, until the root of the
2142 * filesystem is reached.
2143 */
2144 struct exportinfo *
2145 nfs_vptoexi(vnode_t *dvp, vnode_t *vp, cred_t *cr, int *walk,
2146 int *err, bool_t v4srv)
2147 {
2148 fid_t fid;
2149 int error;
2150 struct exportinfo *exi;
2151
2152 ASSERT(vp);
2153 VN_HOLD(vp);
2154 if (dvp != NULL) {
2155 VN_HOLD(dvp);
2156 }
2157 if (walk != NULL)
2158 *walk = 0;
2159
2160 for (;;) {
2161 bzero(&fid, sizeof (fid));
2162 fid.fid_len = MAXFIDSZ;
2163 error = vop_fid_pseudo(vp, &fid);
2164 if (error) {
2165 /*
2166 * If vop_fid_pseudo returns ENOSPC then the fid
2167 * supplied is too small. For now we simply
2168 * return EREMOTE.
2169 */
2170 if (error == ENOSPC)
2171 error = EREMOTE;
2172 break;
2173 }
2174
2175 if (v4srv)
2176 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2177 else
2178 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
2179
2180 if (exi != NULL) {
2181 /*
2182 * Found the export info
2183 */
2184 break;
2185 }
2186
2187 /*
2188 * We have just failed finding a matching export.
2189 * If we're at the root of this filesystem, then
2190 * it's time to stop (with failure).
2191 */
2192 if ((vp->v_flag & VROOT) || VN_IS_CURZONEROOT(vp)) {
2193 error = EINVAL;
2194 break;
2195 }
2196
2197 if (walk != NULL)
2198 (*walk)++;
2199
2200 /*
2201 * Now, do a ".." up vp. If dvp is supplied, use it,
2202 * otherwise, look it up.
2203 */
2204 if (dvp == NULL) {
2205 error = VOP_LOOKUP(vp, "..", &dvp, NULL, 0, NULL, cr,
2206 NULL, NULL, NULL);
2207 if (error)
2208 break;
2209 }
2210 VN_RELE(vp);
2211 vp = dvp;
2212 dvp = NULL;
2213 }
2214 VN_RELE(vp);
2215 if (dvp != NULL) {
2216 VN_RELE(dvp);
2217 }
2218 if (error != 0) {
2219 if (err != NULL)
2220 *err = error;
2221 return (NULL);
2222 }
2223 return (exi);
2224 }
2225
2226 int
2227 chk_clnt_sec(exportinfo_t *exi, struct svc_req *req)
2228 {
2229 int i, nfsflavor;
2230 struct secinfo *sp;
2231
2232 /*
2233 * Get the nfs flavor number from xprt.
2234 */
2235 nfsflavor = (int)(uintptr_t)req->rq_xprt->xp_cookie;
2236
2237 sp = exi->exi_export.ex_secinfo;
2238 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2239 if ((nfsflavor == sp[i].s_secinfo.sc_nfsnum) &&
2240 SEC_REF_EXPORTED(sp + i))
2241 return (TRUE);
2242 }
2243 return (FALSE);
2244 }
2245
2246 /*
2247 * Make an fhandle from a vnode
2248 */
2249 int
2250 makefh(fhandle_t *fh, vnode_t *vp, exportinfo_t *exi)
2251 {
2252 int error;
2253
2254 *fh = exi->exi_fh; /* struct copy */
2255
2256 error = VOP_FID(vp, (fid_t *)&fh->fh_len, NULL);
2257 if (error) {
2258 /*
2259 * Should be something other than EREMOTE
2260 */
2261 return (EREMOTE);
2262 }
2263 return (0);
2264 }
2265
2266 /*
2267 * This routine makes an overloaded V2 fhandle which contains
2268 * sec modes.
2269 *
2270 * Note that the first four octets contain the length octet,
2271 * the status octet, and two padded octets to make them XDR
2272 * four-octet aligned.
2273 *
2274 * 1 2 3 4 32
2275 * +---+---+---+---+---+---+---+---+ +---+---+---+---+ +---+
2276 * | l | s | | | sec_1 |...| sec_n |...| |
2277 * +---+---+---+---+---+---+---+---+ +---+---+---+---+ +---+
2278 *
2279 * where
2280 *
2281 * the status octet s indicates whether there are more security
2282 * flavors (1 means yes, 0 means no) that require the client to
2283 * perform another 0x81 LOOKUP to get them,
2284 *
2285 * the length octet l is the length describing the number of
2286 * valid octets that follow. (l = 4 * n, where n is the number
2287 * of security flavors sent in the current overloaded filehandle.)
2288 *
2289 * sec_index should always be in the inclusive range: [1 - ex_seccnt],
2290 * and it tells server where to start within the secinfo array.
2291 * Usually it will always be 1; however, if more flavors are used
2292 * for the public export than can be encoded in the overloaded FH
2293 * (7 for NFS2), subsequent SNEGO MCLs will have a larger index
2294 * so the server will pick up where it left off from the previous
2295 * MCL reply.
2296 *
2297 * With NFS4 support, implicitly allowed flavors are also in
2298 * the secinfo array; however, they should not be returned in
2299 * SNEGO MCL replies.
2300 */
2301 int
2302 makefh_ol(fhandle_t *fh, exportinfo_t *exi, uint_t sec_index)
2303 {
2304 secinfo_t sec[MAX_FLAVORS];
2305 int totalcnt, i, *ipt, cnt, seccnt, secidx, fh_max_cnt;
2306 char *c;
2307
2308 if (fh == NULL || exi == NULL || sec_index < 1)
2309 return (EREMOTE);
2310
2311 /*
2312 * WebNFS clients need to know the unique set of explicitly
2313 * shared flavors in used for the public export. When
2314 * "TRUE" is passed to build_seclist_nodups(), only explicitly
2315 * shared flavors are included in the list.
2316 */
2317 seccnt = build_seclist_nodups(&exi->exi_export, sec, TRUE);
2318 if (sec_index > seccnt)
2319 return (EREMOTE);
2320
2321 fh_max_cnt = (NFS_FHSIZE / sizeof (int)) - 1;
2322 totalcnt = seccnt - sec_index + 1;
2323 cnt = totalcnt > fh_max_cnt ? fh_max_cnt : totalcnt;
2324
2325 c = (char *)fh;
2326 /*
2327 * Encode the length octet representing the number of
2328 * security flavors (in bytes) in this overloaded fh.
2329 */
2330 *c = cnt * sizeof (int);
2331
2332 /*
2333 * Encode the status octet that indicates whether there
2334 * are more security flavors the client needs to get.
2335 */
2336 *(c + 1) = totalcnt > fh_max_cnt;
2337
2338 /*
2339 * put security flavors in the overloaded fh
2340 */
2341 ipt = (int *)(c + sizeof (int32_t));
2342 secidx = sec_index - 1;
2343 for (i = 0; i < cnt; i++) {
2344 ipt[i] = htonl(sec[i + secidx].s_secinfo.sc_nfsnum);
2345 }
2346 return (0);
2347 }
2348
2349 /*
2350 * Make an nfs_fh3 from a vnode
2351 */
2352 int
2353 makefh3(nfs_fh3 *fh, vnode_t *vp, struct exportinfo *exi)
2354 {
2355 int error;
2356 fid_t fid;
2357
2358 bzero(&fid, sizeof (fid));
2359 fid.fid_len = sizeof (fh->fh3_data);
2360 error = VOP_FID(vp, &fid, NULL);
2361 if (error)
2362 return (EREMOTE);
2363
2364 bzero(fh, sizeof (nfs_fh3));
2365 fh->fh3_fsid = exi->exi_fsid;
2366 fh->fh3_len = fid.fid_len;
2367 bcopy(fid.fid_data, fh->fh3_data, fh->fh3_len);
2368
2369 fh->fh3_xlen = exi->exi_fid.fid_len;
2370 ASSERT(fh->fh3_xlen <= sizeof (fh->fh3_xdata));
2371 bcopy(exi->exi_fid.fid_data, fh->fh3_xdata, fh->fh3_xlen);
2372
2373 fh->fh3_length = sizeof (fh->fh3_fsid)
2374 + sizeof (fh->fh3_len) + fh->fh3_len
2375 + sizeof (fh->fh3_xlen) + fh->fh3_xlen;
2376 fh->fh3_flags = 0;
2377
2378 return (0);
2379 }
2380
2381 /*
2382 * This routine makes an overloaded V3 fhandle which contains
2383 * sec modes.
2384 *
2385 * 1 4
2386 * +--+--+--+--+
2387 * | len |
2388 * +--+--+--+--+
2389 * up to 64
2390 * +--+--+--+--+--+--+--+--+--+--+--+--+ +--+--+--+--+
2391 * |s | | | | sec_1 | sec_2 | ... | sec_n |
2392 * +--+--+--+--+--+--+--+--+--+--+--+--+ +--+--+--+--+
2393 *
2394 * len = 4 * (n+1), where n is the number of security flavors
2395 * sent in the current overloaded filehandle.
2396 *
2397 * the status octet s indicates whether there are more security
2398 * mechanisms (1 means yes, 0 means no) that require the client
2399 * to perform another 0x81 LOOKUP to get them.
2400 *
2401 * Three octets are padded after the status octet.
2402 */
2403 int
2404 makefh3_ol(nfs_fh3 *fh, struct exportinfo *exi, uint_t sec_index)
2405 {
2406 secinfo_t sec[MAX_FLAVORS];
2407 int totalcnt, cnt, *ipt, i, seccnt, fh_max_cnt, secidx;
2408 char *c;
2409
2410 if (fh == NULL || exi == NULL || sec_index < 1)
2411 return (EREMOTE);
2412
2413 /*
2414 * WebNFS clients need to know the unique set of explicitly
2415 * shared flavors in used for the public export. When
2416 * "TRUE" is passed to build_seclist_nodups(), only explicitly
2417 * shared flavors are included in the list.
2418 */
2419 seccnt = build_seclist_nodups(&exi->exi_export, sec, TRUE);
2420
2421 if (sec_index > seccnt)
2422 return (EREMOTE);
2423
2424 fh_max_cnt = (NFS3_FHSIZE / sizeof (int)) - 1;
2425 totalcnt = seccnt - sec_index + 1;
2426 cnt = totalcnt > fh_max_cnt ? fh_max_cnt : totalcnt;
2427
2428 /*
2429 * Place the length in fh3_length representing the number
2430 * of security flavors (in bytes) in this overloaded fh.
2431 */
2432 fh->fh3_flags = FH_WEBNFS;
2433 fh->fh3_length = (cnt+1) * sizeof (int32_t);
2434
2435 c = (char *)&fh->fh3_u.nfs_fh3_i.fh3_i;
2436 /*
2437 * Encode the status octet that indicates whether there
2438 * are more security flavors the client needs to get.
2439 */
2440 *c = totalcnt > fh_max_cnt;
2441
2442 /*
2443 * put security flavors in the overloaded fh
2444 */
2445 secidx = sec_index - 1;
2446 ipt = (int *)(c + sizeof (int32_t));
2447 for (i = 0; i < cnt; i++) {
2448 ipt[i] = htonl(sec[i + secidx].s_secinfo.sc_nfsnum);
2449 }
2450 return (0);
2451 }
2452
2453 /*
2454 * Make an nfs_fh4 from a vnode
2455 */
2456 int
2457 makefh4(nfs_fh4 *fh, vnode_t *vp, struct exportinfo *exi)
2458 {
2459 int error;
2460 nfs_fh4_fmt_t *fh_fmtp = (nfs_fh4_fmt_t *)fh->nfs_fh4_val;
2461 fid_t fid;
2462
2463 bzero(&fid, sizeof (fid));
2464 fid.fid_len = MAXFIDSZ;
2465 /*
2466 * vop_fid_pseudo() is used to set up NFSv4 namespace, so
2467 * use vop_fid_pseudo() here to get the fid instead of VOP_FID.
2468 */
2469 error = vop_fid_pseudo(vp, &fid);
2470 if (error)
2471 return (error);
2472
2473 fh->nfs_fh4_len = NFS_FH4_LEN;
2474
2475 fh_fmtp->fh4_i.fhx_fsid = exi->exi_fh.fh_fsid;
2476 fh_fmtp->fh4_i.fhx_xlen = exi->exi_fh.fh_xlen;
2477
2478 bzero(fh_fmtp->fh4_i.fhx_data, sizeof (fh_fmtp->fh4_i.fhx_data));
2479 bzero(fh_fmtp->fh4_i.fhx_xdata, sizeof (fh_fmtp->fh4_i.fhx_xdata));
2480 ASSERT(exi->exi_fh.fh_xlen <= sizeof (fh_fmtp->fh4_i.fhx_xdata));
2481 bcopy(exi->exi_fh.fh_xdata, fh_fmtp->fh4_i.fhx_xdata,
2482 exi->exi_fh.fh_xlen);
2483
2484 fh_fmtp->fh4_len = fid.fid_len;
2485 ASSERT(fid.fid_len <= sizeof (fh_fmtp->fh4_data));
2486 bcopy(fid.fid_data, fh_fmtp->fh4_data, fid.fid_len);
2487 fh_fmtp->fh4_flag = 0;
2488
2489 #ifdef VOLATILE_FH_TEST
2490 /*
2491 * XXX (temporary?)
2492 * Use the rnode volatile_id value to add volatility to the fh.
2493 *
2494 * For testing purposes there are currently two scenarios, based
2495 * on whether the filesystem was shared with "volatile_fh"
2496 * or "expire_on_rename". In the first case, use the value of
2497 * export struct share_time as the volatile_id. In the second
2498 * case use the vnode volatile_id value (which is set to the
2499 * time in which the file was renamed).
2500 *
2501 * Note that the above are temporary constructs for testing only
2502 * XXX
2503 */
2504 if (exi->exi_export.ex_flags & EX_VOLRNM) {
2505 fh_fmtp->fh4_volatile_id = find_volrnm_fh_id(exi, fh);
2506 } else if (exi->exi_export.ex_flags & EX_VOLFH) {
2507 fh_fmtp->fh4_volatile_id = exi->exi_volatile_id;
2508 } else {
2509 fh_fmtp->fh4_volatile_id = 0;
2510 }
2511 #endif /* VOLATILE_FH_TEST */
2512
2513 return (0);
2514 }
2515
2516 /*
2517 * Convert an fhandle into a vnode.
2518 * Uses the file id (fh_len + fh_data) in the fhandle to get the vnode.
2519 * WARNING: users of this routine must do a VN_RELE on the vnode when they
2520 * are done with it.
2521 */
2522 vnode_t *
2523 nfs_fhtovp(fhandle_t *fh, struct exportinfo *exi)
2524 {
2525 vfs_t *vfsp;
2526 vnode_t *vp;
2527 int error;
2528 fid_t *fidp;
2529
2530 TRACE_0(TR_FAC_NFS, TR_FHTOVP_START,
2531 "fhtovp_start");
2532
2533 if (exi == NULL) {
2534 TRACE_1(TR_FAC_NFS, TR_FHTOVP_END,
2535 "fhtovp_end:(%S)", "exi NULL");
2536 return (NULL); /* not exported */
2537 }
2538
2539 ASSERT(exi->exi_vp != NULL);
2540
2541 if (PUBLIC_FH2(fh)) {
2542 if (exi->exi_export.ex_flags & EX_PUBLIC) {
2543 TRACE_1(TR_FAC_NFS, TR_FHTOVP_END,
2544 "fhtovp_end:(%S)", "root not exported");
2545 return (NULL);
2546 }
2547 vp = exi->exi_vp;
2548 VN_HOLD(vp);
2549 return (vp);
2550 }
2551
2552 vfsp = exi->exi_vp->v_vfsp;
2553 ASSERT(vfsp != NULL);
2554 fidp = (fid_t *)&fh->fh_len;
2555
2556 error = VFS_VGET(vfsp, &vp, fidp);
2557 if (error || vp == NULL) {
2558 TRACE_1(TR_FAC_NFS, TR_FHTOVP_END,
2559 "fhtovp_end:(%S)", "VFS_GET failed or vp NULL");
2560 return (NULL);
2561 }
2562 TRACE_1(TR_FAC_NFS, TR_FHTOVP_END,
2563 "fhtovp_end:(%S)", "end");
2564 return (vp);
2565 }
2566
2567 /*
2568 * Convert an nfs_fh3 into a vnode.
2569 * Uses the file id (fh_len + fh_data) in the file handle to get the vnode.
2570 * WARNING: users of this routine must do a VN_RELE on the vnode when they
2571 * are done with it.
2572 */
2573 vnode_t *
2574 nfs3_fhtovp(nfs_fh3 *fh, struct exportinfo *exi)
2575 {
2576 vfs_t *vfsp;
2577 vnode_t *vp;
2578 int error;
2579 fid_t *fidp;
2580
2581 if (exi == NULL)
2582 return (NULL); /* not exported */
2583
2584 ASSERT(exi->exi_vp != NULL);
2585
2586 if (PUBLIC_FH3(fh)) {
2587 if (exi->exi_export.ex_flags & EX_PUBLIC)
2588 return (NULL);
2589 vp = exi->exi_vp;
2590 VN_HOLD(vp);
2591 return (vp);
2592 }
2593
2594 if (fh->fh3_length < NFS3_OLDFHSIZE ||
2595 fh->fh3_length > NFS3_MAXFHSIZE)
2596 return (NULL);
2597
2598 vfsp = exi->exi_vp->v_vfsp;
2599 ASSERT(vfsp != NULL);
2600 fidp = FH3TOFIDP(fh);
2601
2602 error = VFS_VGET(vfsp, &vp, fidp);
2603 if (error || vp == NULL)
2604 return (NULL);
2605
2606 return (vp);
2607 }
2608
2609 /*
2610 * Convert an nfs_fh4 into a vnode.
2611 * Uses the file id (fh_len + fh_data) in the file handle to get the vnode.
2612 * WARNING: users of this routine must do a VN_RELE on the vnode when they
2613 * are done with it.
2614 */
2615 vnode_t *
2616 nfs4_fhtovp(nfs_fh4 *fh, struct exportinfo *exi, nfsstat4 *statp)
2617 {
2618 vfs_t *vfsp;
2619 vnode_t *vp = NULL;
2620 int error;
2621 fid_t *fidp;
2622 nfs_fh4_fmt_t *fh_fmtp;
2623 #ifdef VOLATILE_FH_TEST
2624 uint32_t volatile_id = 0;
2625 #endif /* VOLATILE_FH_TEST */
2626
2627 if (exi == NULL) {
2628 *statp = NFS4ERR_STALE;
2629 return (NULL); /* not exported */
2630 }
2631 ASSERT(exi->exi_vp != NULL);
2632
2633 /* caller should have checked this */
2634 ASSERT(fh->nfs_fh4_len >= NFS_FH4_LEN);
2635
2636 fh_fmtp = (nfs_fh4_fmt_t *)fh->nfs_fh4_val;
2637 vfsp = exi->exi_vp->v_vfsp;
2638 ASSERT(vfsp != NULL);
2639 fidp = (fid_t *)&fh_fmtp->fh4_len;
2640
2641 #ifdef VOLATILE_FH_TEST
2642 /* XXX check if volatile - should be changed later */
2643 if (exi->exi_export.ex_flags & (EX_VOLRNM | EX_VOLFH)) {
2644 /*
2645 * Filesystem is shared with volatile filehandles
2646 */
2647 if (exi->exi_export.ex_flags & EX_VOLRNM)
2648 volatile_id = find_volrnm_fh_id(exi, fh);
2649 else
2650 volatile_id = exi->exi_volatile_id;
2651
2652 if (fh_fmtp->fh4_volatile_id != volatile_id) {
2653 *statp = NFS4ERR_FHEXPIRED;
2654 return (NULL);
2655 }
2656 }
2657 /*
2658 * XXX even if test_volatile_fh false, the fh may contain a
2659 * volatile id if obtained when the test was set.
2660 */
2661 fh_fmtp->fh4_volatile_id = (uchar_t)0;
2662 #endif /* VOLATILE_FH_TEST */
2663
2664 error = VFS_VGET(vfsp, &vp, fidp);
2665 /*
2666 * If we can not get vp from VFS_VGET, perhaps this is
2667 * an nfs v2/v3/v4 node in an nfsv4 pseudo filesystem.
2668 * Check it out.
2669 */
2670 if (error && PSEUDO(exi))
2671 error = nfs4_vget_pseudo(exi, &vp, fidp);
2672
2673 if (error || vp == NULL) {
2674 *statp = NFS4ERR_STALE;
2675 return (NULL);
2676 }
2677 /* XXX - disgusting hack */
2678 if (vp->v_type == VNON && vp->v_flag & V_XATTRDIR)
2679 vp->v_type = VDIR;
2680 *statp = NFS4_OK;
2681 return (vp);
2682 }
2683
2684 /*
2685 * Find the export structure associated with the given filesystem.
2686 * If found, then increment the ref count (exi_count).
2687 */
2688 struct exportinfo *
2689 checkexport(fsid_t *fsid, fid_t *fid)
2690 {
2691 struct exportinfo *exi;
2692 nfs_export_t *ne = nfs_get_export();
2693
2694 rw_enter(&ne->exported_lock, RW_READER);
2695 for (exi = ne->exptable[exptablehash(fsid, fid)];
2696 exi != NULL;
2697 exi = exi->fid_hash.next) {
2698 if (exportmatch(exi, fsid, fid)) {
2699 /*
2700 * If this is the place holder for the
2701 * public file handle, then return the
2702 * real export entry for the public file
2703 * handle.
2704 */
2705 if (exi->exi_export.ex_flags & EX_PUBLIC) {
2706 exi = ne->exi_public;
2707 }
2708
2709 exi_hold(exi);
2710 rw_exit(&ne->exported_lock);
2711 return (exi);
2712 }
2713 }
2714 rw_exit(&ne->exported_lock);
2715 return (NULL);
2716 }
2717
2718
2719 /*
2720 * "old school" version of checkexport() for NFS4. NFS4
2721 * rfs4_compound holds exported_lock for duration of compound
2722 * processing. This version doesn't manipulate exi_count
2723 * since NFS4 breaks fundamental assumptions in the exi_count
2724 * design.
2725 */
2726 struct exportinfo *
2727 checkexport4(fsid_t *fsid, fid_t *fid, vnode_t *vp)
2728 {
2729 struct exportinfo *exi;
2730 nfs_export_t *ne = nfs_get_export();
2731
2732 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
2733
2734 for (exi = ne->exptable[exptablehash(fsid, fid)];
2735 exi != NULL;
2736 exi = exi->fid_hash.next) {
2737 if (exportmatch(exi, fsid, fid)) {
2738 /*
2739 * If this is the place holder for the
2740 * public file handle, then return the
2741 * real export entry for the public file
2742 * handle.
2743 */
2744 if (exi->exi_export.ex_flags & EX_PUBLIC) {
2745 exi = ne->exi_public;
2746 }
2747
2748 /*
2749 * If vp is given, check if vp is the
2750 * same vnode as the exported node.
2751 *
2752 * Since VOP_FID of a lofs node returns the
2753 * fid of its real node (ufs), the exported
2754 * node for lofs and (pseudo) ufs may have
2755 * the same fsid and fid.
2756 */
2757 if (vp == NULL || vp == exi->exi_vp)
2758 return (exi);
2759 }
2760 }
2761
2762 return (NULL);
2763 }
2764
2765 /*
2766 * Free an entire export list node
2767 */
2768 void
2769 exportfree(struct exportinfo *exi)
2770 {
2771 struct exportdata *ex;
2772 struct charset_cache *cache;
2773 int i;
2774
2775 ex = &exi->exi_export;
2776
2777 ASSERT(exi->exi_vp != NULL && !(exi->exi_export.ex_flags & EX_PUBLIC));
2778 VN_RELE(exi->exi_vp);
2779 if (exi->exi_dvp != NULL)
2780 VN_RELE(exi->exi_dvp);
2781
2782 if (ex->ex_flags & EX_INDEX)
2783 kmem_free(ex->ex_index, strlen(ex->ex_index) + 1);
2784
2785 kmem_free(ex->ex_path, ex->ex_pathlen + 1);
2786 nfsauth_cache_free(exi);
2787
2788 /*
2789 * if there is a character set mapping cached, clean it up.
2790 */
2791 for (cache = exi->exi_charset; cache != NULL;
2792 cache = exi->exi_charset) {
2793 if (cache->inbound != (kiconv_t)-1)
2794 (void) kiconv_close(cache->inbound);
2795 if (cache->outbound != (kiconv_t)-1)
2796 (void) kiconv_close(cache->outbound);
2797 exi->exi_charset = cache->next;
2798 kmem_free(cache, sizeof (struct charset_cache));
2799 }
2800
2801 if (exi->exi_logbuffer != NULL)
2802 nfslog_disable(exi);
2803
2804 if (ex->ex_flags & EX_LOG) {
2805 kmem_free(ex->ex_log_buffer, ex->ex_log_bufferlen + 1);
2806 kmem_free(ex->ex_tag, ex->ex_taglen + 1);
2807 }
2808
2809 if (exi->exi_visible)
2810 free_visible(exi->exi_visible);
2811
2812 srv_secinfo_list_free(ex->ex_secinfo, ex->ex_seccnt);
2813
2814 #ifdef VOLATILE_FH_TEST
2815 free_volrnm_list(exi);
2816 mutex_destroy(&exi->exi_vol_rename_lock);
2817 #endif /* VOLATILE_FH_TEST */
2818
2819 mutex_destroy(&exi->exi_lock);
2820 rw_destroy(&exi->exi_cache_lock);
2821 /*
2822 * All nodes in the exi_cache AVL trees were removed and freed in the
2823 * nfsauth_cache_free() call above. We will just destroy and free the
2824 * empty AVL trees here.
2825 */
2826 for (i = 0; i < AUTH_TABLESIZE; i++) {
2827 avl_destroy(exi->exi_cache[i]);
2828 kmem_free(exi->exi_cache[i], sizeof (avl_tree_t));
2829 }
2830
2831 kmem_free(exi, sizeof (*exi));
2832 }
2833
2834 /*
2835 * load the index file from user space into kernel space.
2836 */
2837 static int
2838 loadindex(struct exportdata *kex)
2839 {
2840 int error;
2841 char index[MAXNAMELEN+1];
2842 size_t len;
2843
2844 /*
2845 * copyinstr copies the complete string including the NULL and
2846 * returns the len with the NULL byte included in the calculation
2847 * as long as the max length is not exceeded.
2848 */
2849 if (error = copyinstr(kex->ex_index, index, sizeof (index), &len))
2850 return (error);
2851
2852 kex->ex_index = kmem_alloc(len, KM_SLEEP);
2853 bcopy(index, kex->ex_index, len);
2854
2855 return (0);
2856 }
2857
2858 void
2859 exi_hold(struct exportinfo *exi)
2860 {
2861 mutex_enter(&exi->exi_lock);
2862 exi->exi_count++;
2863 mutex_exit(&exi->exi_lock);
2864 }
2865
2866 /*
2867 * When a thread completes using exi, it should call exi_rele().
2868 * exi_rele() decrements exi_count. It releases exi if exi_count == 0, i.e.
2869 * if this is the last user of exi and exi is not on exportinfo list anymore
2870 */
2871 void
2872 exi_rele(struct exportinfo *exi)
2873 {
2874 mutex_enter(&exi->exi_lock);
2875 exi->exi_count--;
2876 if (exi->exi_count == 0) {
2877 mutex_exit(&exi->exi_lock);
2878 exportfree(exi);
2879 } else
2880 mutex_exit(&exi->exi_lock);
2881 }
2882
2883 #ifdef VOLATILE_FH_TEST
2884 /*
2885 * Test for volatile fh's - add file handle to list and set its volatile id
2886 * to time it was renamed. If EX_VOLFH is also on and the fs is reshared,
2887 * the vol_rename queue is purged.
2888 *
2889 * XXX This code is for unit testing purposes only... To correctly use it, it
2890 * needs to tie a rename list to the export struct and (more
2891 * important), protect access to the exi rename list using a write lock.
2892 */
2893
2894 /*
2895 * get the fh vol record if it's in the volatile on rename list. Don't check
2896 * volatile_id in the file handle - compare only the file handles.
2897 */
2898 static struct ex_vol_rename *
2899 find_volrnm_fh(struct exportinfo *exi, nfs_fh4 *fh4p)
2900 {
2901 struct ex_vol_rename *p = NULL;
2902 fhandle4_t *fhp;
2903
2904 /* XXX shouldn't we assert &exported_lock held? */
2905 ASSERT(MUTEX_HELD(&exi->exi_vol_rename_lock));
2906
2907 if (fh4p->nfs_fh4_len != NFS_FH4_LEN) {
2908 return (NULL);
2909 }
2910 fhp = &((nfs_fh4_fmt_t *)fh4p->nfs_fh4_val)->fh4_i;
2911 for (p = exi->exi_vol_rename; p != NULL; p = p->vrn_next) {
2912 if (bcmp(fhp, &p->vrn_fh_fmt.fh4_i,
2913 sizeof (fhandle4_t)) == 0)
2914 break;
2915 }
2916 return (p);
2917 }
2918
2919 /*
2920 * get the volatile id for the fh (if there is - else return 0). Ignore the
2921 * volatile_id in the file handle - compare only the file handles.
2922 */
2923 static uint32_t
2924 find_volrnm_fh_id(struct exportinfo *exi, nfs_fh4 *fh4p)
2925 {
2926 struct ex_vol_rename *p;
2927 uint32_t volatile_id;
2928
2929 mutex_enter(&exi->exi_vol_rename_lock);
2930 p = find_volrnm_fh(exi, fh4p);
2931 volatile_id = (p ? p->vrn_fh_fmt.fh4_volatile_id :
2932 exi->exi_volatile_id);
2933 mutex_exit(&exi->exi_vol_rename_lock);
2934 return (volatile_id);
2935 }
2936
2937 /*
2938 * Free the volatile on rename list - will be called if a filesystem is
2939 * unshared or reshared without EX_VOLRNM
2940 */
2941 static void
2942 free_volrnm_list(struct exportinfo *exi)
2943 {
2944 struct ex_vol_rename *p, *pnext;
2945
2946 /* no need to hold mutex lock - this one is called from exportfree */
2947 for (p = exi->exi_vol_rename; p != NULL; p = pnext) {
2948 pnext = p->vrn_next;
2949 kmem_free(p, sizeof (*p));
2950 }
2951 exi->exi_vol_rename = NULL;
2952 }
2953
2954 /*
2955 * Add a file handle to the volatile on rename list.
2956 */
2957 void
2958 add_volrnm_fh(struct exportinfo *exi, vnode_t *vp)
2959 {
2960 struct ex_vol_rename *p;
2961 char fhbuf[NFS4_FHSIZE];
2962 nfs_fh4 fh4;
2963 int error;
2964
2965 fh4.nfs_fh4_val = fhbuf;
2966 error = makefh4(&fh4, vp, exi);
2967 if ((error) || (fh4.nfs_fh4_len != sizeof (p->vrn_fh_fmt))) {
2968 return;
2969 }
2970
2971 mutex_enter(&exi->exi_vol_rename_lock);
2972
2973 p = find_volrnm_fh(exi, &fh4);
2974
2975 if (p == NULL) {
2976 p = kmem_alloc(sizeof (*p), KM_SLEEP);
2977 bcopy(fh4.nfs_fh4_val, &p->vrn_fh_fmt, sizeof (p->vrn_fh_fmt));
2978 p->vrn_next = exi->exi_vol_rename;
2979 exi->exi_vol_rename = p;
2980 }
2981
2982 p->vrn_fh_fmt.fh4_volatile_id = gethrestime_sec();
2983 mutex_exit(&exi->exi_vol_rename_lock);
2984 }
2985
2986 #endif /* VOLATILE_FH_TEST */