1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright (c) 2013 Joyent, Inc. All rights reserved.
25 */
26
27 /*
28 * Datalink management routines.
29 */
30
31 #include <sys/types.h>
32 #include <sys/door.h>
33 #include <sys/zone.h>
34 #include <sys/modctl.h>
35 #include <sys/file.h>
36 #include <sys/modhash.h>
37 #include <sys/kstat.h>
38 #include <sys/vnode.h>
39 #include <sys/cmn_err.h>
40 #include <sys/softmac.h>
41 #include <sys/dls.h>
42 #include <sys/dls_impl.h>
43 #include <sys/stropts.h>
44 #include <sys/netstack.h>
45 #include <inet/iptun/iptun_impl.h>
46
47 /*
48 * This vanity name management module is treated as part of the GLD framework
49 * and we don't hold any GLD framework lock across a call to any mac
50 * function that needs to acquire the mac perimeter. The hierarchy is
51 * mac perimeter -> framework locks
52 */
53
54 typedef struct dls_stack {
55 zoneid_t dlss_zoneid;
56 } dls_stack_t;
57
58 static kmem_cache_t *i_dls_devnet_cachep;
59 static kmutex_t i_dls_mgmt_lock;
60 static krwlock_t i_dls_devnet_lock;
61 static mod_hash_t *i_dls_devnet_id_hash;
62 static mod_hash_t *i_dls_devnet_hash;
63
64 boolean_t devnet_need_rebuild;
65
66 #define VLAN_HASHSZ 67 /* prime */
67
68 /*
69 * The following macros take a link name without the trailing PPA as input.
70 * Opening a /dev/net node with one of these names causes a tunnel link to be
71 * implicitly created in dls_devnet_hold_by_name() for backward compatibility
72 * with Solaris 10 and prior.
73 */
74 #define IS_IPV4_TUN(name) (strcmp((name), "ip.tun") == 0)
75 #define IS_IPV6_TUN(name) (strcmp((name), "ip6.tun") == 0)
76 #define IS_6TO4_TUN(name) (strcmp((name), "ip.6to4tun") == 0)
77 #define IS_IPTUN_LINK(name) ( \
78 IS_IPV4_TUN(name) || IS_IPV6_TUN(name) || IS_6TO4_TUN(name))
79
80 /* Upcall door handle */
81 static door_handle_t dls_mgmt_dh = NULL;
82
83 #define DD_CONDEMNED 0x1
84 #define DD_KSTAT_CHANGING 0x2
85 #define DD_IMPLICIT_IPTUN 0x4 /* Implicitly-created ip*.*tun* tunnel */
86
87 /*
88 * This structure is used to keep the <linkid, macname> mapping.
89 * This structure itself is not protected by the mac perimeter, but is
90 * protected by the dd_mutex and i_dls_devnet_lock. Thus most of the
91 * functions manipulating this structure such as dls_devnet_set/unset etc.
92 * may be called while not holding the mac perimeter.
93 */
94 typedef struct dls_devnet_s {
95 datalink_id_t dd_linkid;
96 char dd_linkname[MAXLINKNAMELEN];
97 char dd_mac[MAXNAMELEN];
98 kstat_t *dd_ksp; /* kstat in owner_zid */
99 kstat_t *dd_zone_ksp; /* in dd_zid if != owner_zid */
100 uint32_t dd_ref;
101 kmutex_t dd_mutex;
102 kcondvar_t dd_cv;
103 uint32_t dd_tref;
104 uint_t dd_flags;
105 zoneid_t dd_owner_zid; /* zone where node was created */
106 zoneid_t dd_zid; /* current zone */
107 boolean_t dd_prop_loaded;
108 taskqid_t dd_prop_taskid;
109 boolean_t dd_transient; /* link goes away when zone does */
110 } dls_devnet_t;
111
112 static int i_dls_devnet_create_iptun(const char *, const char *,
113 datalink_id_t *);
114 static int i_dls_devnet_destroy_iptun(datalink_id_t);
115 static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t, boolean_t);
116 static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t);
117
118 /*ARGSUSED*/
119 static int
120 i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
121 {
122 dls_devnet_t *ddp = buf;
123
124 bzero(buf, sizeof (dls_devnet_t));
125 mutex_init(&ddp->dd_mutex, NULL, MUTEX_DEFAULT, NULL);
126 cv_init(&ddp->dd_cv, NULL, CV_DEFAULT, NULL);
127 return (0);
128 }
129
130 /*ARGSUSED*/
131 static void
132 i_dls_devnet_destructor(void *buf, void *arg)
133 {
134 dls_devnet_t *ddp = buf;
135
136 ASSERT(ddp->dd_ksp == NULL);
137 ASSERT(ddp->dd_ref == 0);
138 ASSERT(ddp->dd_tref == 0);
139 mutex_destroy(&ddp->dd_mutex);
140 cv_destroy(&ddp->dd_cv);
141 }
142
143 /* ARGSUSED */
144 static int
145 dls_zone_remove(datalink_id_t linkid, void *arg)
146 {
147 dls_devnet_t *ddp;
148
149 if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
150 /*
151 * Don't bother moving transient links back to the global zone
152 * since we will simply delete them in dls_devnet_unset.
153 */
154 if (!ddp->dd_transient)
155 (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
156 dls_devnet_rele_tmp(ddp);
157 }
158 return (0);
159 }
160
161 /* ARGSUSED */
162 static void *
163 dls_stack_init(netstackid_t stackid, netstack_t *ns)
164 {
165 dls_stack_t *dlss;
166
167 dlss = kmem_zalloc(sizeof (*dlss), KM_SLEEP);
168 dlss->dlss_zoneid = netstackid_to_zoneid(stackid);
169 return (dlss);
170 }
171
172 /* ARGSUSED */
173 static void
174 dls_stack_shutdown(netstackid_t stackid, void *arg)
175 {
176 dls_stack_t *dlss = (dls_stack_t *)arg;
177
178 /* Move remaining datalinks in this zone back to the global zone. */
179 (void) zone_datalink_walk(dlss->dlss_zoneid, dls_zone_remove, NULL);
180 }
181
182 /* ARGSUSED */
183 static void
184 dls_stack_fini(netstackid_t stackid, void *arg)
185 {
186 dls_stack_t *dlss = (dls_stack_t *)arg;
187
188 kmem_free(dlss, sizeof (*dlss));
189 }
190
191 /*
192 * Module initialization and finalization functions.
193 */
194 void
195 dls_mgmt_init(void)
196 {
197 mutex_init(&i_dls_mgmt_lock, NULL, MUTEX_DEFAULT, NULL);
198 rw_init(&i_dls_devnet_lock, NULL, RW_DEFAULT, NULL);
199
200 /*
201 * Create a kmem_cache of dls_devnet_t structures.
202 */
203 i_dls_devnet_cachep = kmem_cache_create("dls_devnet_cache",
204 sizeof (dls_devnet_t), 0, i_dls_devnet_constructor,
205 i_dls_devnet_destructor, NULL, NULL, NULL, 0);
206 ASSERT(i_dls_devnet_cachep != NULL);
207
208 /*
209 * Create a hash table, keyed by dd_linkid, of dls_devnet_t.
210 */
211 i_dls_devnet_id_hash = mod_hash_create_idhash("dls_devnet_id_hash",
212 VLAN_HASHSZ, mod_hash_null_valdtor);
213
214 /*
215 * Create a hash table, keyed by dd_mac
216 */
217 i_dls_devnet_hash = mod_hash_create_extended("dls_devnet_hash",
218 VLAN_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
219 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
220
221 devnet_need_rebuild = B_FALSE;
222
223 netstack_register(NS_DLS, dls_stack_init, dls_stack_shutdown,
224 dls_stack_fini);
225 }
226
227 void
228 dls_mgmt_fini(void)
229 {
230 netstack_unregister(NS_DLS);
231 mod_hash_destroy_hash(i_dls_devnet_hash);
232 mod_hash_destroy_hash(i_dls_devnet_id_hash);
233 kmem_cache_destroy(i_dls_devnet_cachep);
234 rw_destroy(&i_dls_devnet_lock);
235 mutex_destroy(&i_dls_mgmt_lock);
236 }
237
238 int
239 dls_mgmt_door_set(boolean_t start)
240 {
241 int err;
242
243 /* handle daemon restart */
244 mutex_enter(&i_dls_mgmt_lock);
245 if (dls_mgmt_dh != NULL) {
246 door_ki_rele(dls_mgmt_dh);
247 dls_mgmt_dh = NULL;
248 }
249
250 if (start && ((err = door_ki_open(DLMGMT_DOOR, &dls_mgmt_dh)) != 0)) {
251 mutex_exit(&i_dls_mgmt_lock);
252 return (err);
253 }
254
255 mutex_exit(&i_dls_mgmt_lock);
256
257 /*
258 * Create and associate <link name, linkid> mapping for network devices
259 * which are already attached before the daemon is started.
260 */
261 if (start)
262 softmac_recreate();
263 return (0);
264 }
265
266 static boolean_t
267 i_dls_mgmt_door_revoked(door_handle_t dh)
268 {
269 struct door_info info;
270 extern int sys_shutdown;
271
272 ASSERT(dh != NULL);
273
274 if (sys_shutdown) {
275 cmn_err(CE_NOTE, "dls_mgmt_door: shutdown observed\n");
276 return (B_TRUE);
277 }
278
279 if (door_ki_info(dh, &info) != 0)
280 return (B_TRUE);
281
282 return ((info.di_attributes & DOOR_REVOKED) != 0);
283 }
284
285 /*
286 * Upcall to the datalink management daemon (dlmgmtd).
287 */
288 static int
289 i_dls_mgmt_upcall(void *arg, size_t asize, void *rbuf, size_t rsize)
290 {
291 door_arg_t darg, save_arg;
292 door_handle_t dh;
293 int err;
294 int retry = 0;
295
296 #define MAXRETRYNUM 3
297
298 ASSERT(arg);
299 darg.data_ptr = arg;
300 darg.data_size = asize;
301 darg.desc_ptr = NULL;
302 darg.desc_num = 0;
303 darg.rbuf = rbuf;
304 darg.rsize = rsize;
305 save_arg = darg;
306
307 retry:
308 mutex_enter(&i_dls_mgmt_lock);
309 dh = dls_mgmt_dh;
310 if ((dh == NULL) || i_dls_mgmt_door_revoked(dh)) {
311 mutex_exit(&i_dls_mgmt_lock);
312 return (EBADF);
313 }
314 door_ki_hold(dh);
315 mutex_exit(&i_dls_mgmt_lock);
316
317 for (;;) {
318 retry++;
319 if ((err = door_ki_upcall_limited(dh, &darg, zone_kcred(),
320 SIZE_MAX, 0)) == 0)
321 break;
322
323 /*
324 * handle door call errors
325 */
326 darg = save_arg;
327 switch (err) {
328 case EINTR:
329 /*
330 * If the operation which caused this door upcall gets
331 * interrupted, return directly.
332 */
333 goto done;
334 case EAGAIN:
335 /*
336 * Repeat upcall if the maximum attempt limit has not
337 * been reached.
338 */
339 if (retry < MAXRETRYNUM) {
340 delay(2 * hz);
341 break;
342 }
343 cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
344 goto done;
345 default:
346 /* A fatal door error */
347 if (i_dls_mgmt_door_revoked(dh)) {
348 cmn_err(CE_NOTE,
349 "dls: dlmgmtd door service revoked\n");
350
351 if (retry < MAXRETRYNUM) {
352 door_ki_rele(dh);
353 goto retry;
354 }
355 }
356 cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
357 goto done;
358 }
359 }
360
361 if (darg.rbuf != rbuf) {
362 /*
363 * The size of the input rbuf was not big enough, so the
364 * upcall allocated the rbuf itself. If this happens, assume
365 * that this was an invalid door call request.
366 */
367 kmem_free(darg.rbuf, darg.rsize);
368 err = ENOSPC;
369 goto done;
370 }
371
372 if (darg.rsize != rsize) {
373 err = EINVAL;
374 goto done;
375 }
376
377 err = ((dlmgmt_retval_t *)rbuf)->lr_err;
378
379 done:
380 door_ki_rele(dh);
381 return (err);
382 }
383
384 /*
385 * Request the datalink management daemon to create a link with the attributes
386 * below. Upon success, zero is returned and linkidp contains the linkid for
387 * the new link; otherwise, an errno is returned.
388 *
389 * - dev physical dev_t. required for all physical links,
390 * including GLDv3 links. It will be used to force the
391 * attachment of a physical device, hence the
392 * registration of its mac
393 * - class datalink class
394 * - media type media type; DL_OTHER means unknown
395 * - persist whether to persist the datalink
396 */
397 int
398 dls_mgmt_create(const char *devname, dev_t dev, datalink_class_t class,
399 uint32_t media, boolean_t persist, datalink_id_t *linkidp)
400 {
401 dlmgmt_upcall_arg_create_t create;
402 dlmgmt_create_retval_t retval;
403 int err;
404
405 create.ld_cmd = DLMGMT_CMD_DLS_CREATE;
406 create.ld_class = class;
407 create.ld_media = media;
408 create.ld_phymaj = getmajor(dev);
409 create.ld_phyinst = getminor(dev);
410 create.ld_persist = persist;
411 if (strlcpy(create.ld_devname, devname, sizeof (create.ld_devname)) >=
412 sizeof (create.ld_devname))
413 return (EINVAL);
414
415 if ((err = i_dls_mgmt_upcall(&create, sizeof (create), &retval,
416 sizeof (retval))) == 0) {
417 *linkidp = retval.lr_linkid;
418 }
419 return (err);
420 }
421
422 /*
423 * Request the datalink management daemon to destroy the specified link.
424 * Returns zero upon success, or an errno upon failure.
425 */
426 int
427 dls_mgmt_destroy(datalink_id_t linkid, boolean_t persist)
428 {
429 dlmgmt_upcall_arg_destroy_t destroy;
430 dlmgmt_destroy_retval_t retval;
431
432 destroy.ld_cmd = DLMGMT_CMD_DLS_DESTROY;
433 destroy.ld_linkid = linkid;
434 destroy.ld_persist = persist;
435
436 return (i_dls_mgmt_upcall(&destroy, sizeof (destroy),
437 &retval, sizeof (retval)));
438 }
439
440 /*
441 * Request the datalink management daemon to verify/update the information
442 * for a physical link. Upon success, get its linkid.
443 *
444 * - media type media type
445 * - novanity whether this physical datalink supports vanity naming.
446 * physical links that do not use the GLDv3 MAC plugin
447 * cannot suport vanity naming
448 *
449 * This function could fail with ENOENT or EEXIST. Two cases return EEXIST:
450 *
451 * 1. A link with devname already exists, but the media type does not match.
452 * In this case, mediap will bee set to the media type of the existing link.
453 * 2. A link with devname already exists, but its link name does not match
454 * the device name, although this link does not support vanity naming.
455 */
456 int
457 dls_mgmt_update(const char *devname, uint32_t media, boolean_t novanity,
458 uint32_t *mediap, datalink_id_t *linkidp)
459 {
460 dlmgmt_upcall_arg_update_t update;
461 dlmgmt_update_retval_t retval;
462 int err;
463
464 update.ld_cmd = DLMGMT_CMD_DLS_UPDATE;
465
466 if (strlcpy(update.ld_devname, devname, sizeof (update.ld_devname)) >=
467 sizeof (update.ld_devname))
468 return (EINVAL);
469
470 update.ld_media = media;
471 update.ld_novanity = novanity;
472
473 if ((err = i_dls_mgmt_upcall(&update, sizeof (update), &retval,
474 sizeof (retval))) == EEXIST) {
475 *linkidp = retval.lr_linkid;
476 *mediap = retval.lr_media;
477 } else if (err == 0) {
478 *linkidp = retval.lr_linkid;
479 }
480
481 return (err);
482 }
483
484 /*
485 * Request the datalink management daemon to get the information for a link.
486 * Returns zero upon success, or an errno upon failure.
487 *
488 * Only fills in information for argument pointers that are non-NULL.
489 * Note that the link argument is expected to be MAXLINKNAMELEN bytes.
490 */
491 int
492 dls_mgmt_get_linkinfo(datalink_id_t linkid, char *link,
493 datalink_class_t *classp, uint32_t *mediap, uint32_t *flagsp)
494 {
495 dlmgmt_door_getname_t getname;
496 dlmgmt_getname_retval_t retval;
497 int err, len;
498
499 getname.ld_cmd = DLMGMT_CMD_GETNAME;
500 getname.ld_linkid = linkid;
501
502 if ((err = i_dls_mgmt_upcall(&getname, sizeof (getname), &retval,
503 sizeof (retval))) != 0) {
504 return (err);
505 }
506
507 len = strlen(retval.lr_link);
508 if (len <= 1 || len >= MAXLINKNAMELEN)
509 return (EINVAL);
510
511 if (link != NULL)
512 (void) strlcpy(link, retval.lr_link, MAXLINKNAMELEN);
513 if (classp != NULL)
514 *classp = retval.lr_class;
515 if (mediap != NULL)
516 *mediap = retval.lr_media;
517 if (flagsp != NULL)
518 *flagsp = retval.lr_flags;
519 return (0);
520 }
521
522 /*
523 * Request the datalink management daemon to get the linkid for a link.
524 * Returns a non-zero error code on failure. The linkid argument is only
525 * set on success (when zero is returned.)
526 */
527 int
528 dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid)
529 {
530 dlmgmt_door_getlinkid_t getlinkid;
531 dlmgmt_getlinkid_retval_t retval;
532 int err;
533
534 getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
535 (void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
536 getlinkid.ld_zoneid = getzoneid();
537
538 if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
539 sizeof (retval))) == 0) {
540 *linkid = retval.lr_linkid;
541 }
542 return (err);
543 }
544
545 datalink_id_t
546 dls_mgmt_get_next(datalink_id_t linkid, datalink_class_t class,
547 datalink_media_t dmedia, uint32_t flags)
548 {
549 dlmgmt_door_getnext_t getnext;
550 dlmgmt_getnext_retval_t retval;
551
552 getnext.ld_cmd = DLMGMT_CMD_GETNEXT;
553 getnext.ld_class = class;
554 getnext.ld_dmedia = dmedia;
555 getnext.ld_flags = flags;
556 getnext.ld_linkid = linkid;
557
558 if (i_dls_mgmt_upcall(&getnext, sizeof (getnext), &retval,
559 sizeof (retval)) != 0) {
560 return (DATALINK_INVALID_LINKID);
561 }
562
563 return (retval.lr_linkid);
564 }
565
566 static int
567 i_dls_mgmt_get_linkattr(const datalink_id_t linkid, const char *attr,
568 void *attrval, size_t *attrszp)
569 {
570 dlmgmt_upcall_arg_getattr_t getattr;
571 dlmgmt_getattr_retval_t retval;
572 int err;
573
574 getattr.ld_cmd = DLMGMT_CMD_DLS_GETATTR;
575 getattr.ld_linkid = linkid;
576 (void) strlcpy(getattr.ld_attr, attr, MAXLINKATTRLEN);
577
578 if ((err = i_dls_mgmt_upcall(&getattr, sizeof (getattr), &retval,
579 sizeof (retval))) == 0) {
580 if (*attrszp < retval.lr_attrsz)
581 return (EINVAL);
582 *attrszp = retval.lr_attrsz;
583 bcopy(retval.lr_attrval, attrval, retval.lr_attrsz);
584 }
585
586 return (err);
587 }
588
589 /*
590 * Note that this function can only get devp successfully for non-VLAN link.
591 */
592 int
593 dls_mgmt_get_phydev(datalink_id_t linkid, dev_t *devp)
594 {
595 uint64_t maj, inst;
596 size_t attrsz = sizeof (uint64_t);
597
598 if (i_dls_mgmt_get_linkattr(linkid, FPHYMAJ, &maj, &attrsz) != 0 ||
599 attrsz != sizeof (uint64_t) ||
600 i_dls_mgmt_get_linkattr(linkid, FPHYINST, &inst, &attrsz) != 0 ||
601 attrsz != sizeof (uint64_t)) {
602 return (EINVAL);
603 }
604
605 *devp = makedevice((major_t)maj, (minor_t)inst);
606 return (0);
607 }
608
609 /*
610 * Request the datalink management daemon to push in
611 * all properties associated with the link.
612 * Returns a non-zero error code on failure.
613 */
614 int
615 dls_mgmt_linkprop_init(datalink_id_t linkid)
616 {
617 dlmgmt_door_linkprop_init_t li;
618 dlmgmt_linkprop_init_retval_t retval;
619 int err;
620
621 li.ld_cmd = DLMGMT_CMD_LINKPROP_INIT;
622 li.ld_linkid = linkid;
623
624 err = i_dls_mgmt_upcall(&li, sizeof (li), &retval, sizeof (retval));
625 return (err);
626 }
627
628 static void
629 dls_devnet_prop_task(void *arg)
630 {
631 dls_devnet_t *ddp = arg;
632
633 (void) dls_mgmt_linkprop_init(ddp->dd_linkid);
634
635 mutex_enter(&ddp->dd_mutex);
636 ddp->dd_prop_loaded = B_TRUE;
637 ddp->dd_prop_taskid = NULL;
638 cv_broadcast(&ddp->dd_cv);
639 mutex_exit(&ddp->dd_mutex);
640 }
641
642 /*
643 * Ensure property loading task is completed.
644 */
645 void
646 dls_devnet_prop_task_wait(dls_dl_handle_t ddp)
647 {
648 mutex_enter(&ddp->dd_mutex);
649 while (ddp->dd_prop_taskid != NULL)
650 cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
651 mutex_exit(&ddp->dd_mutex);
652 }
653
654 void
655 dls_devnet_rele_tmp(dls_dl_handle_t dlh)
656 {
657 dls_devnet_t *ddp = dlh;
658
659 mutex_enter(&ddp->dd_mutex);
660 ASSERT(ddp->dd_tref != 0);
661 if (--ddp->dd_tref == 0)
662 cv_signal(&ddp->dd_cv);
663 mutex_exit(&ddp->dd_mutex);
664 }
665
666 int
667 dls_devnet_hold_link(datalink_id_t linkid, dls_dl_handle_t *ddhp,
668 dls_link_t **dlpp)
669 {
670 dls_dl_handle_t dlh;
671 dls_link_t *dlp;
672 int err;
673
674 if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0)
675 return (err);
676
677 if ((err = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) {
678 dls_devnet_rele_tmp(dlh);
679 return (err);
680 }
681
682 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
683
684 *ddhp = dlh;
685 *dlpp = dlp;
686 return (0);
687 }
688
689 void
690 dls_devnet_rele_link(dls_dl_handle_t dlh, dls_link_t *dlp)
691 {
692 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
693
694 dls_link_rele(dlp);
695 dls_devnet_rele_tmp(dlh);
696 }
697
698 /*
699 * "link" kstats related functions.
700 */
701
702 /*
703 * Query the "link" kstats.
704 *
705 * We may be called from the kstat subsystem in an arbitrary context.
706 * If the caller is the stack, the context could be an upcall data
707 * thread. Hence we can't acquire the mac perimeter in this function
708 * for fear of deadlock.
709 */
710 static int
711 dls_devnet_stat_update(kstat_t *ksp, int rw)
712 {
713 dls_devnet_t *ddp = ksp->ks_private;
714 dls_link_t *dlp;
715 int err;
716
717 /*
718 * Check the link is being renamed or if the link is going away
719 * before incrementing dd_tref which in turn prevents the link
720 * from being renamed or deleted until we finish.
721 */
722 mutex_enter(&ddp->dd_mutex);
723 if (ddp->dd_flags & (DD_CONDEMNED | DD_KSTAT_CHANGING)) {
724 mutex_exit(&ddp->dd_mutex);
725 return (ENOENT);
726 }
727 ddp->dd_tref++;
728 mutex_exit(&ddp->dd_mutex);
729
730 /*
731 * If a device detach happens at this time, it will block in
732 * dls_devnet_unset since the dd_tref has been bumped up above. So the
733 * access to 'dlp' is safe even though we don't hold the mac perimeter.
734 */
735 if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)ddp->dd_mac,
736 (mod_hash_val_t *)&dlp) != 0) {
737 dls_devnet_rele_tmp(ddp);
738 return (ENOENT);
739 }
740
741 err = dls_stat_update(ksp, dlp, rw);
742
743 dls_devnet_rele_tmp(ddp);
744 return (err);
745 }
746
747 /*
748 * Create the "link" kstats.
749 */
750 static void
751 dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid, zoneid_t newzoneid)
752 {
753 kstat_t *ksp;
754 char *nm;
755 char kname[MAXLINKNAMELEN];
756
757 if (zoneid != newzoneid) {
758 ASSERT(zoneid == GLOBAL_ZONEID);
759 (void) snprintf(kname, sizeof (kname), "z%d_%s", newzoneid,
760 ddp->dd_linkname);
761 nm = kname;
762 } else {
763 nm = ddp->dd_linkname;
764 }
765
766 if (dls_stat_create("link", 0, nm, zoneid,
767 dls_devnet_stat_update, ddp, &ksp, newzoneid) == 0) {
768 ASSERT(ksp != NULL);
769 if (zoneid == ddp->dd_owner_zid) {
770 ASSERT(ddp->dd_ksp == NULL);
771 ddp->dd_ksp = ksp;
772 } else {
773 ASSERT(ddp->dd_zone_ksp == NULL);
774 ddp->dd_zone_ksp = ksp;
775 }
776 }
777 }
778
779 /*
780 * Destroy the "link" kstats.
781 */
782 static void
783 dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
784 {
785 if (zoneid == ddp->dd_owner_zid) {
786 if (ddp->dd_ksp != NULL) {
787 dls_stat_delete(ddp->dd_ksp);
788 ddp->dd_ksp = NULL;
789 }
790 } else {
791 if (ddp->dd_zone_ksp != NULL) {
792 dls_stat_delete(ddp->dd_zone_ksp);
793 ddp->dd_zone_ksp = NULL;
794 }
795 }
796 }
797
798 /*
799 * The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
800 * and create the new set using the new name.
801 */
802 static void
803 dls_devnet_stat_rename(dls_devnet_t *ddp, boolean_t zoneinit)
804 {
805 if (ddp->dd_ksp != NULL) {
806 dls_stat_delete(ddp->dd_ksp);
807 ddp->dd_ksp = NULL;
808 }
809 if (zoneinit && ddp->dd_zone_ksp != NULL) {
810 dls_stat_delete(ddp->dd_zone_ksp);
811 ddp->dd_zone_ksp = NULL;
812 }
813 /*
814 * We can't rename a link while it's assigned to a non-global zone
815 * unless we're first initializing the zone while readying it.
816 */
817 ASSERT(ddp->dd_zone_ksp == NULL);
818 dls_devnet_stat_create(ddp, ddp->dd_owner_zid,
819 (zoneinit ? ddp->dd_zid : ddp->dd_owner_zid));
820 if (zoneinit)
821 dls_devnet_stat_create(ddp, ddp->dd_zid, ddp->dd_zid);
822 }
823
824 /*
825 * Associate a linkid with a given link (identified by macname)
826 */
827 static int
828 dls_devnet_set(const char *macname, datalink_id_t linkid, zoneid_t zoneid,
829 dls_devnet_t **ddpp)
830 {
831 dls_devnet_t *ddp = NULL;
832 datalink_class_t class;
833 int err;
834 boolean_t stat_create = B_FALSE;
835 char linkname[MAXLINKNAMELEN];
836
837 rw_enter(&i_dls_devnet_lock, RW_WRITER);
838
839 /*
840 * Don't allow callers to set a link name with a linkid that already
841 * has a name association (that's what rename is for).
842 */
843 if (linkid != DATALINK_INVALID_LINKID) {
844 if (mod_hash_find(i_dls_devnet_id_hash,
845 (mod_hash_key_t)(uintptr_t)linkid,
846 (mod_hash_val_t *)&ddp) == 0) {
847 err = EEXIST;
848 goto done;
849 }
850 if ((err = dls_mgmt_get_linkinfo(linkid, linkname, &class,
851 NULL, NULL)) != 0)
852 goto done;
853 }
854
855 if ((err = mod_hash_find(i_dls_devnet_hash,
856 (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) == 0) {
857 if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
858 err = EEXIST;
859 goto done;
860 }
861
862 /*
863 * This might be a physical link that has already
864 * been created, but which does not have a linkid
865 * because dlmgmtd was not running when it was created.
866 */
867 if (linkid == DATALINK_INVALID_LINKID ||
868 class != DATALINK_CLASS_PHYS) {
869 err = EINVAL;
870 goto done;
871 }
872 } else {
873 ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP);
874 ddp->dd_tref = 0;
875 ddp->dd_ref++;
876 ddp->dd_owner_zid = zoneid;
877 (void) strlcpy(ddp->dd_mac, macname, sizeof (ddp->dd_mac));
878 VERIFY(mod_hash_insert(i_dls_devnet_hash,
879 (mod_hash_key_t)ddp->dd_mac, (mod_hash_val_t)ddp) == 0);
880 }
881
882 if (linkid != DATALINK_INVALID_LINKID) {
883 ddp->dd_linkid = linkid;
884 (void) strlcpy(ddp->dd_linkname, linkname,
885 sizeof (ddp->dd_linkname));
886 VERIFY(mod_hash_insert(i_dls_devnet_id_hash,
887 (mod_hash_key_t)(uintptr_t)linkid,
888 (mod_hash_val_t)ddp) == 0);
889 devnet_need_rebuild = B_TRUE;
890 stat_create = B_TRUE;
891 mutex_enter(&ddp->dd_mutex);
892 if (!ddp->dd_prop_loaded && (ddp->dd_prop_taskid == NULL)) {
893 ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
894 dls_devnet_prop_task, ddp, TQ_SLEEP);
895 }
896 mutex_exit(&ddp->dd_mutex);
897 }
898 err = 0;
899 done:
900 /*
901 * It is safe to drop the i_dls_devnet_lock at this point. In the case
902 * of physical devices, the softmac framework will fail the device
903 * detach based on the smac_state or smac_hold_cnt. Other cases like
904 * vnic and aggr use their own scheme to serialize creates and deletes
905 * and ensure that *ddp is valid.
906 */
907 rw_exit(&i_dls_devnet_lock);
908 if (err == 0) {
909 if (zoneid != GLOBAL_ZONEID &&
910 (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE,
911 B_FALSE)) != 0)
912 (void) dls_devnet_unset(macname, &linkid, B_TRUE);
913 /*
914 * The kstat subsystem holds its own locks (rather perimeter)
915 * before calling the ks_update (dls_devnet_stat_update) entry
916 * point which in turn grabs the i_dls_devnet_lock. So the
917 * lock hierarchy is kstat locks -> i_dls_devnet_lock.
918 */
919 if (stat_create)
920 dls_devnet_stat_create(ddp, zoneid, zoneid);
921 if (ddpp != NULL)
922 *ddpp = ddp;
923 }
924 return (err);
925 }
926
927 /*
928 * Disassociate a linkid with a given link (identified by macname)
929 * This waits until temporary references to the dls_devnet_t are gone.
930 */
931 static int
932 dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait)
933 {
934 dls_devnet_t *ddp;
935 int err;
936 mod_hash_val_t val;
937
938 rw_enter(&i_dls_devnet_lock, RW_WRITER);
939 if ((err = mod_hash_find(i_dls_devnet_hash,
940 (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) != 0) {
941 ASSERT(err == MH_ERR_NOTFOUND);
942 rw_exit(&i_dls_devnet_lock);
943 return (ENOENT);
944 }
945
946 mutex_enter(&ddp->dd_mutex);
947
948 /*
949 * Make sure downcalls into softmac_create or softmac_destroy from
950 * devfs don't cv_wait on any devfs related condition for fear of
951 * deadlock. Return EBUSY if the asynchronous thread started for
952 * property loading as part of the post attach hasn't yet completed.
953 */
954 ASSERT(ddp->dd_ref != 0);
955 if ((ddp->dd_ref != 1) || (!wait &&
956 (ddp->dd_tref != 0 || ddp->dd_prop_taskid != NULL))) {
957 int zstatus = 0;
958
959 /*
960 * There are a couple of alternatives that might be going on
961 * here; a) the zone is shutting down and it has a transient
962 * link assigned, in which case we want to clean it up instead
963 * of moving it back to the global zone, or b) its possible
964 * that we're trying to clean up an orphaned vnic that was
965 * delegated to a zone and which wasn't cleaned up properly
966 * when the zone went away. Check for either of these cases
967 * before we simply return EBUSY.
968 *
969 * zstatus indicates which situation we are dealing with:
970 * 0 - means return EBUSY
971 * 1 - means case (a), cleanup transient link
972 * -1 - means case (b), orphained VNIC
973 */
974 if (ddp->dd_ref > 1 && ddp->dd_zid != GLOBAL_ZONEID) {
975 zone_t *zp;
976
977 if ((zp = zone_find_by_id(ddp->dd_zid)) == NULL) {
978 zstatus = -1;
979 } else {
980 if (ddp->dd_transient) {
981 zone_status_t s = zone_status_get(zp);
982
983 if (s >= ZONE_IS_SHUTTING_DOWN)
984 zstatus = 1;
985 }
986 zone_rele(zp);
987 }
988 }
989
990 if (zstatus == 0) {
991 mutex_exit(&ddp->dd_mutex);
992 rw_exit(&i_dls_devnet_lock);
993 return (EBUSY);
994 }
995
996 /*
997 * We want to delete the link, reset ref to 1;
998 */
999 if (zstatus == -1)
1000 /* Log a warning, but continue in this case */
1001 cmn_err(CE_WARN, "clear orphaned datalink: %s\n",
1002 ddp->dd_linkname);
1003 ddp->dd_ref = 1;
1004 }
1005
1006 ddp->dd_flags |= DD_CONDEMNED;
1007 ddp->dd_ref--;
1008 *id = ddp->dd_linkid;
1009
1010 if (ddp->dd_zid != GLOBAL_ZONEID) {
1011 /*
1012 * We need to release the dd_mutex before we try and destroy the
1013 * stat. When we destroy it, we'll need to grab the lock for the
1014 * kstat but if there's a concurrent reader of the kstat, we'll
1015 * be blocked on it. This will lead to deadlock because these
1016 * kstats employ a ks_update function (dls_devnet_stat_update)
1017 * which needs the dd_mutex that we currently hold.
1018 *
1019 * Because we've already flagged the dls_devnet_t as
1020 * DD_CONDEMNED and we still have a write lock on
1021 * i_dls_devnet_lock, we should be able to release the dd_mutex.
1022 */
1023 mutex_exit(&ddp->dd_mutex);
1024 dls_devnet_stat_destroy(ddp, ddp->dd_zid);
1025 mutex_enter(&ddp->dd_mutex);
1026 (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE,
1027 B_FALSE);
1028 }
1029
1030 /*
1031 * Remove this dls_devnet_t from the hash table.
1032 */
1033 VERIFY(mod_hash_remove(i_dls_devnet_hash,
1034 (mod_hash_key_t)ddp->dd_mac, &val) == 0);
1035
1036 if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
1037 VERIFY(mod_hash_remove(i_dls_devnet_id_hash,
1038 (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, &val) == 0);
1039
1040 devnet_need_rebuild = B_TRUE;
1041 }
1042 rw_exit(&i_dls_devnet_lock);
1043
1044 if (wait) {
1045 /*
1046 * Wait until all temporary references are released.
1047 */
1048 while ((ddp->dd_tref != 0) || (ddp->dd_prop_taskid != NULL))
1049 cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
1050 } else {
1051 ASSERT(ddp->dd_tref == 0 && ddp->dd_prop_taskid == NULL);
1052 }
1053
1054 if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
1055 /*
1056 * See the earlier call in this function for an explanation.
1057 */
1058 mutex_exit(&ddp->dd_mutex);
1059 dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
1060 mutex_enter(&ddp->dd_mutex);
1061 }
1062
1063
1064 ddp->dd_prop_loaded = B_FALSE;
1065 ddp->dd_linkid = DATALINK_INVALID_LINKID;
1066 ddp->dd_flags = 0;
1067 mutex_exit(&ddp->dd_mutex);
1068 kmem_cache_free(i_dls_devnet_cachep, ddp);
1069
1070 return (0);
1071 }
1072
1073 static int
1074 dls_devnet_hold_common(datalink_id_t linkid, dls_devnet_t **ddpp,
1075 boolean_t tmp_hold)
1076 {
1077 dls_devnet_t *ddp;
1078 dev_t phydev = 0;
1079 dls_dev_handle_t ddh = NULL;
1080 int err;
1081
1082 /*
1083 * Hold this link to prevent it being detached in case of a
1084 * physical link.
1085 */
1086 if (dls_mgmt_get_phydev(linkid, &phydev) == 0)
1087 (void) softmac_hold_device(phydev, &ddh);
1088
1089 rw_enter(&i_dls_devnet_lock, RW_WRITER);
1090 if ((err = mod_hash_find(i_dls_devnet_id_hash,
1091 (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&ddp)) != 0) {
1092 ASSERT(err == MH_ERR_NOTFOUND);
1093 rw_exit(&i_dls_devnet_lock);
1094 softmac_rele_device(ddh);
1095 return (ENOENT);
1096 }
1097
1098 mutex_enter(&ddp->dd_mutex);
1099 ASSERT(ddp->dd_ref > 0);
1100 if (ddp->dd_flags & DD_CONDEMNED) {
1101 mutex_exit(&ddp->dd_mutex);
1102 rw_exit(&i_dls_devnet_lock);
1103 softmac_rele_device(ddh);
1104 return (ENOENT);
1105 }
1106 if (tmp_hold)
1107 ddp->dd_tref++;
1108 else
1109 ddp->dd_ref++;
1110 mutex_exit(&ddp->dd_mutex);
1111 rw_exit(&i_dls_devnet_lock);
1112
1113 softmac_rele_device(ddh);
1114
1115 *ddpp = ddp;
1116 return (0);
1117 }
1118
1119 int
1120 dls_devnet_hold(datalink_id_t linkid, dls_devnet_t **ddpp)
1121 {
1122 return (dls_devnet_hold_common(linkid, ddpp, B_FALSE));
1123 }
1124
1125 /*
1126 * Hold the vanity naming structure (dls_devnet_t) temporarily. The request to
1127 * delete the dls_devnet_t will wait until the temporary reference is released.
1128 */
1129 int
1130 dls_devnet_hold_tmp(datalink_id_t linkid, dls_devnet_t **ddpp)
1131 {
1132 return (dls_devnet_hold_common(linkid, ddpp, B_TRUE));
1133 }
1134
1135 /*
1136 * This funtion is called when a DLS client tries to open a device node.
1137 * This dev_t could a result of a /dev/net node access (returned by
1138 * devnet_create_rvp->dls_devnet_open()) or a direct /dev node access.
1139 * In both cases, this function bumps up the reference count of the
1140 * dls_devnet_t structure. The reference is held as long as the device node
1141 * is open. In the case of /dev/net while it is true that the initial reference
1142 * is held when the devnet_create_rvp->dls_devnet_open call happens, this
1143 * initial reference is released immediately in devnet_inactive_callback ->
1144 * dls_devnet_close(). (Note that devnet_inactive_callback() is called right
1145 * after dld_open completes, not when the /dev/net node is being closed).
1146 * To undo this function, call dls_devnet_rele()
1147 */
1148 int
1149 dls_devnet_hold_by_dev(dev_t dev, dls_dl_handle_t *ddhp)
1150 {
1151 char name[MAXNAMELEN];
1152 char *drv;
1153 dls_dev_handle_t ddh = NULL;
1154 dls_devnet_t *ddp;
1155 int err;
1156
1157 if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1158 return (EINVAL);
1159
1160 (void) snprintf(name, sizeof (name), "%s%d", drv,
1161 DLS_MINOR2INST(getminor(dev)));
1162
1163 /*
1164 * Hold this link to prevent it being detached in case of a
1165 * GLDv3 physical link.
1166 */
1167 if (DLS_MINOR2INST(getminor(dev)) <= DLS_MAX_PPA)
1168 (void) softmac_hold_device(dev, &ddh);
1169
1170 rw_enter(&i_dls_devnet_lock, RW_WRITER);
1171 if ((err = mod_hash_find(i_dls_devnet_hash,
1172 (mod_hash_key_t)name, (mod_hash_val_t *)&ddp)) != 0) {
1173 ASSERT(err == MH_ERR_NOTFOUND);
1174 rw_exit(&i_dls_devnet_lock);
1175 softmac_rele_device(ddh);
1176 return (ENOENT);
1177 }
1178 mutex_enter(&ddp->dd_mutex);
1179 ASSERT(ddp->dd_ref > 0);
1180 if (ddp->dd_flags & DD_CONDEMNED) {
1181 mutex_exit(&ddp->dd_mutex);
1182 rw_exit(&i_dls_devnet_lock);
1183 softmac_rele_device(ddh);
1184 return (ENOENT);
1185 }
1186 ddp->dd_ref++;
1187 mutex_exit(&ddp->dd_mutex);
1188 rw_exit(&i_dls_devnet_lock);
1189
1190 softmac_rele_device(ddh);
1191
1192 *ddhp = ddp;
1193 return (0);
1194 }
1195
1196 void
1197 dls_devnet_rele(dls_devnet_t *ddp)
1198 {
1199 mutex_enter(&ddp->dd_mutex);
1200 ASSERT(ddp->dd_ref > 1);
1201 ddp->dd_ref--;
1202 if ((ddp->dd_flags & DD_IMPLICIT_IPTUN) && ddp->dd_ref == 1) {
1203 mutex_exit(&ddp->dd_mutex);
1204 if (i_dls_devnet_destroy_iptun(ddp->dd_linkid) != 0)
1205 ddp->dd_flags |= DD_IMPLICIT_IPTUN;
1206 return;
1207 }
1208 mutex_exit(&ddp->dd_mutex);
1209 }
1210
1211 static int
1212 dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp)
1213 {
1214 char drv[MAXLINKNAMELEN];
1215 uint_t ppa;
1216 major_t major;
1217 dev_t phy_dev, tmp_dev;
1218 datalink_id_t linkid;
1219 dls_dev_handle_t ddh;
1220 int err;
1221
1222 if ((err = dls_mgmt_get_linkid(link, &linkid)) == 0)
1223 return (dls_devnet_hold(linkid, ddpp));
1224
1225 /*
1226 * If we failed to get the link's linkid because the dlmgmtd daemon
1227 * has not been started, return ENOENT so that the application can
1228 * fallback to open the /dev node.
1229 */
1230 if (err == EBADF)
1231 return (ENOENT);
1232
1233 if (err != ENOENT)
1234 return (err);
1235
1236 if (ddi_parse(link, drv, &ppa) != DDI_SUCCESS)
1237 return (ENOENT);
1238
1239 if (IS_IPTUN_LINK(drv)) {
1240 if ((err = i_dls_devnet_create_iptun(link, drv, &linkid)) != 0)
1241 return (err);
1242 /*
1243 * At this point, an IP tunnel MAC has registered, which
1244 * resulted in a link being created.
1245 */
1246 err = dls_devnet_hold(linkid, ddpp);
1247 ASSERT(err == 0);
1248 if (err != 0) {
1249 VERIFY(i_dls_devnet_destroy_iptun(linkid) == 0);
1250 return (err);
1251 }
1252 /*
1253 * dls_devnet_rele() will know to destroy the implicit IP
1254 * tunnel on last reference release if DD_IMPLICIT_IPTUN is
1255 * set.
1256 */
1257 (*ddpp)->dd_flags |= DD_IMPLICIT_IPTUN;
1258 return (0);
1259 }
1260
1261 /*
1262 * If this link:
1263 * (a) is a physical device, (b) this is the first boot, (c) the MAC
1264 * is not registered yet, and (d) we cannot find its linkid, then the
1265 * linkname is the same as the devname.
1266 *
1267 * First filter out invalid names.
1268 */
1269 if ((major = ddi_name_to_major(drv)) == (major_t)-1)
1270 return (ENOENT);
1271
1272 phy_dev = makedevice(major, DLS_PPA2MINOR(ppa));
1273 if (softmac_hold_device(phy_dev, &ddh) != 0)
1274 return (ENOENT);
1275
1276 /*
1277 * At this time, the MAC should be registered, check its phy_dev using
1278 * the given name.
1279 */
1280 if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0 ||
1281 (err = dls_mgmt_get_phydev(linkid, &tmp_dev)) != 0) {
1282 softmac_rele_device(ddh);
1283 return (err);
1284 }
1285 if (tmp_dev != phy_dev) {
1286 softmac_rele_device(ddh);
1287 return (ENOENT);
1288 }
1289
1290 err = dls_devnet_hold(linkid, ddpp);
1291 softmac_rele_device(ddh);
1292 return (err);
1293 }
1294
1295 int
1296 dls_devnet_macname2linkid(const char *macname, datalink_id_t *linkidp)
1297 {
1298 dls_devnet_t *ddp;
1299
1300 rw_enter(&i_dls_devnet_lock, RW_READER);
1301 if (mod_hash_find(i_dls_devnet_hash, (mod_hash_key_t)macname,
1302 (mod_hash_val_t *)&ddp) != 0) {
1303 rw_exit(&i_dls_devnet_lock);
1304 return (ENOENT);
1305 }
1306
1307 *linkidp = ddp->dd_linkid;
1308 rw_exit(&i_dls_devnet_lock);
1309 return (0);
1310 }
1311
1312 /*
1313 * Get linkid for the given dev.
1314 */
1315 int
1316 dls_devnet_dev2linkid(dev_t dev, datalink_id_t *linkidp)
1317 {
1318 char macname[MAXNAMELEN];
1319 char *drv;
1320
1321 if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1322 return (EINVAL);
1323
1324 (void) snprintf(macname, sizeof (macname), "%s%d", drv,
1325 DLS_MINOR2INST(getminor(dev)));
1326 return (dls_devnet_macname2linkid(macname, linkidp));
1327 }
1328
1329 /*
1330 * Get the link's physical dev_t. It this is a VLAN, get the dev_t of the
1331 * link this VLAN is created on.
1332 */
1333 int
1334 dls_devnet_phydev(datalink_id_t vlanid, dev_t *devp)
1335 {
1336 dls_devnet_t *ddp;
1337 int err;
1338
1339 if ((err = dls_devnet_hold_tmp(vlanid, &ddp)) != 0)
1340 return (err);
1341
1342 err = dls_mgmt_get_phydev(ddp->dd_linkid, devp);
1343 dls_devnet_rele_tmp(ddp);
1344 return (err);
1345 }
1346
1347 /*
1348 * Handle the renaming requests. There are two rename cases:
1349 *
1350 * 1. Request to rename a valid link (id1) to an non-existent link name
1351 * (id2). In this case id2 is DATALINK_INVALID_LINKID. Just check whether
1352 * id1 is held by any applications.
1353 *
1354 * In this case, the link's kstats need to be updated using the given name.
1355 *
1356 * 2. Request to rename a valid link (id1) to the name of a REMOVED
1357 * physical link (id2). In this case, check that id1 and its associated
1358 * mac is not held by any application, and update the link's linkid to id2.
1359 *
1360 * This case does not change the <link name, linkid> mapping, so the link's
1361 * kstats need to be updated with using name associated the given id2.
1362 *
1363 * The zonename parameter is used to allow us to create a VNIC in the global
1364 * zone which is assigned to a non-global zone. Since there is a race condition
1365 * in the create process if two VNICs have the same name, we need to rename it
1366 * after it has been assigned to the zone.
1367 */
1368 int
1369 dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link,
1370 boolean_t zoneinit)
1371 {
1372 dls_dev_handle_t ddh = NULL;
1373 int err = 0;
1374 dev_t phydev = 0;
1375 dls_devnet_t *ddp;
1376 mac_perim_handle_t mph = NULL;
1377 mac_handle_t mh;
1378 mod_hash_val_t val;
1379 boolean_t clear_dd_flag = B_FALSE;
1380
1381 /*
1382 * In the second case, id2 must be a REMOVED physical link.
1383 */
1384 if ((id2 != DATALINK_INVALID_LINKID) &&
1385 (dls_mgmt_get_phydev(id2, &phydev) == 0) &&
1386 softmac_hold_device(phydev, &ddh) == 0) {
1387 softmac_rele_device(ddh);
1388 return (EEXIST);
1389 }
1390
1391 /*
1392 * Hold id1 to prevent it from being detached (if a physical link).
1393 */
1394 if (dls_mgmt_get_phydev(id1, &phydev) == 0)
1395 (void) softmac_hold_device(phydev, &ddh);
1396
1397 /*
1398 * The framework does not hold hold locks across calls to the
1399 * mac perimeter, hence enter the perimeter first. This also waits
1400 * for the property loading to finish.
1401 */
1402 if ((err = mac_perim_enter_by_linkid(id1, &mph)) != 0) {
1403 softmac_rele_device(ddh);
1404 return (err);
1405 }
1406
1407 rw_enter(&i_dls_devnet_lock, RW_WRITER);
1408 if ((err = mod_hash_find(i_dls_devnet_id_hash,
1409 (mod_hash_key_t)(uintptr_t)id1, (mod_hash_val_t *)&ddp)) != 0) {
1410 ASSERT(err == MH_ERR_NOTFOUND);
1411 err = ENOENT;
1412 goto done;
1413 }
1414
1415 /*
1416 * Return EBUSY if any applications have this link open, if any thread
1417 * is currently accessing the link kstats, or if the link is on-loan
1418 * to a non-global zone. Then set the DD_KSTAT_CHANGING flag to
1419 * prevent any access to the kstats while we delete and recreate
1420 * kstats below. However, we skip this check if we're renaming the
1421 * vnic as part of bringing it up for a zone.
1422 */
1423 mutex_enter(&ddp->dd_mutex);
1424 if (!zoneinit) {
1425 if (ddp->dd_ref > 1) {
1426 mutex_exit(&ddp->dd_mutex);
1427 err = EBUSY;
1428 goto done;
1429 }
1430 }
1431
1432 ddp->dd_flags |= DD_KSTAT_CHANGING;
1433 clear_dd_flag = B_TRUE;
1434 mutex_exit(&ddp->dd_mutex);
1435
1436 if (id2 == DATALINK_INVALID_LINKID) {
1437 (void) strlcpy(ddp->dd_linkname, link,
1438 sizeof (ddp->dd_linkname));
1439
1440 /* rename mac client name and its flow if exists */
1441 if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1442 goto done;
1443 if (zoneinit) {
1444 char tname[MAXLINKNAMELEN];
1445
1446 (void) snprintf(tname, sizeof (tname), "z%d_%s",
1447 ddp->dd_zid, link);
1448 (void) mac_rename_primary(mh, tname);
1449 } else {
1450 (void) mac_rename_primary(mh, link);
1451 }
1452 mac_close(mh);
1453 goto done;
1454 }
1455
1456 /*
1457 * The second case, check whether the MAC is used by any MAC
1458 * user. This must be a physical link so ddh must not be NULL.
1459 */
1460 if (ddh == NULL) {
1461 err = EINVAL;
1462 goto done;
1463 }
1464
1465 if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1466 goto done;
1467
1468 /*
1469 * We release the reference of the MAC which mac_open() is
1470 * holding. Note that this mac will not be unregistered
1471 * because the physical device is held.
1472 */
1473 mac_close(mh);
1474
1475 /*
1476 * Check if there is any other MAC clients, if not, hold this mac
1477 * exclusively until we are done.
1478 */
1479 if ((err = mac_mark_exclusive(mh)) != 0)
1480 goto done;
1481
1482 /*
1483 * Update the link's linkid.
1484 */
1485 if ((err = mod_hash_find(i_dls_devnet_id_hash,
1486 (mod_hash_key_t)(uintptr_t)id2, &val)) != MH_ERR_NOTFOUND) {
1487 mac_unmark_exclusive(mh);
1488 err = EEXIST;
1489 goto done;
1490 }
1491
1492 err = dls_mgmt_get_linkinfo(id2, ddp->dd_linkname, NULL, NULL, NULL);
1493 if (err != 0) {
1494 mac_unmark_exclusive(mh);
1495 goto done;
1496 }
1497
1498 (void) mod_hash_remove(i_dls_devnet_id_hash,
1499 (mod_hash_key_t)(uintptr_t)id1, &val);
1500
1501 ddp->dd_linkid = id2;
1502 (void) mod_hash_insert(i_dls_devnet_id_hash,
1503 (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, (mod_hash_val_t)ddp);
1504
1505 mac_unmark_exclusive(mh);
1506
1507 /* load properties for new id */
1508 mutex_enter(&ddp->dd_mutex);
1509 ddp->dd_prop_loaded = B_FALSE;
1510 ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
1511 dls_devnet_prop_task, ddp, TQ_SLEEP);
1512 mutex_exit(&ddp->dd_mutex);
1513
1514 done:
1515 /*
1516 * Change the name of the kstat based on the new link name.
1517 * We can't hold the i_dls_devnet_lock across calls to the kstat
1518 * subsystem. Instead the DD_KSTAT_CHANGING flag set above in this
1519 * function prevents any access to the dd_ksp while we delete and
1520 * recreate it below.
1521 */
1522 rw_exit(&i_dls_devnet_lock);
1523 if (err == 0)
1524 dls_devnet_stat_rename(ddp, zoneinit);
1525
1526 if (clear_dd_flag) {
1527 mutex_enter(&ddp->dd_mutex);
1528 ddp->dd_flags &= ~DD_KSTAT_CHANGING;
1529 mutex_exit(&ddp->dd_mutex);
1530 }
1531
1532 if (mph != NULL)
1533 mac_perim_exit(mph);
1534 softmac_rele_device(ddh);
1535 return (err);
1536 }
1537
1538 static int
1539 i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop,
1540 boolean_t transient)
1541 {
1542 int err;
1543 mac_perim_handle_t mph;
1544 boolean_t upcall_done = B_FALSE;
1545 datalink_id_t linkid = ddp->dd_linkid;
1546 zoneid_t old_zoneid = ddp->dd_zid;
1547 dlmgmt_door_setzoneid_t setzid;
1548 dlmgmt_setzoneid_retval_t retval;
1549
1550 if (old_zoneid == new_zoneid)
1551 return (0);
1552
1553 if ((err = mac_perim_enter_by_macname(ddp->dd_mac, &mph)) != 0)
1554 return (err);
1555
1556 /*
1557 * When changing the zoneid of an existing link, we need to tell
1558 * dlmgmtd about it. dlmgmtd already knows the zoneid associated with
1559 * newly created links.
1560 */
1561 if (setprop) {
1562 setzid.ld_cmd = DLMGMT_CMD_SETZONEID;
1563 setzid.ld_linkid = linkid;
1564 setzid.ld_zoneid = new_zoneid;
1565 err = i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1566 sizeof (retval));
1567 if (err != 0)
1568 goto done;
1569 upcall_done = B_TRUE;
1570 }
1571 if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
1572 ddp->dd_zid = new_zoneid;
1573 ddp->dd_transient = transient;
1574 devnet_need_rebuild = B_TRUE;
1575 }
1576
1577 done:
1578 if (err != 0 && upcall_done) {
1579 setzid.ld_zoneid = old_zoneid;
1580 (void) i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1581 sizeof (retval));
1582 }
1583 mac_perim_exit(mph);
1584 return (err);
1585 }
1586
1587 int
1588 dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid, boolean_t transient)
1589 {
1590 dls_devnet_t *ddp;
1591 int err;
1592 zoneid_t old_zid;
1593 boolean_t refheld = B_FALSE;
1594
1595 old_zid = ddh->dd_zid;
1596
1597 if (old_zid == new_zid)
1598 return (0);
1599
1600 /*
1601 * Acquire an additional reference to the link if it is being assigned
1602 * to a non-global zone from the global zone.
1603 */
1604 if (old_zid == GLOBAL_ZONEID && new_zid != GLOBAL_ZONEID) {
1605 if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
1606 return (err);
1607 refheld = B_TRUE;
1608 }
1609
1610 if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE, transient)) != 0) {
1611 if (refheld)
1612 dls_devnet_rele(ddp);
1613 return (err);
1614 }
1615
1616 /*
1617 * Release the additional reference if the link is returning to the
1618 * global zone from a non-global zone.
1619 */
1620 if (old_zid != GLOBAL_ZONEID && new_zid == GLOBAL_ZONEID)
1621 dls_devnet_rele(ddh);
1622
1623 /* Re-create kstats in the appropriate zones. */
1624 if (old_zid != GLOBAL_ZONEID)
1625 dls_devnet_stat_destroy(ddh, old_zid);
1626 if (new_zid != GLOBAL_ZONEID)
1627 dls_devnet_stat_create(ddh, new_zid, new_zid);
1628
1629 return (0);
1630 }
1631
1632 zoneid_t
1633 dls_devnet_getzid(dls_dl_handle_t ddh)
1634 {
1635 return (((dls_devnet_t *)ddh)->dd_zid);
1636 }
1637
1638 zoneid_t
1639 dls_devnet_getownerzid(dls_dl_handle_t ddh)
1640 {
1641 return (((dls_devnet_t *)ddh)->dd_owner_zid);
1642 }
1643
1644 /*
1645 * Is linkid visible from zoneid? A link is visible if it was created in the
1646 * zone, or if it is currently assigned to the zone.
1647 */
1648 boolean_t
1649 dls_devnet_islinkvisible(datalink_id_t linkid, zoneid_t zoneid)
1650 {
1651 dls_devnet_t *ddp;
1652 boolean_t result;
1653
1654 if (dls_devnet_hold_tmp(linkid, &ddp) != 0)
1655 return (B_FALSE);
1656 result = (ddp->dd_owner_zid == zoneid || ddp->dd_zid == zoneid);
1657 dls_devnet_rele_tmp(ddp);
1658 return (result);
1659 }
1660
1661 /*
1662 * Access a vanity naming node.
1663 */
1664 int
1665 dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
1666 {
1667 dls_devnet_t *ddp;
1668 dls_link_t *dlp;
1669 zoneid_t zid = getzoneid();
1670 int err;
1671 mac_perim_handle_t mph;
1672
1673 if ((err = dls_devnet_hold_by_name(link, &ddp)) != 0)
1674 return (err);
1675
1676 dls_devnet_prop_task_wait(ddp);
1677
1678 /*
1679 * Opening a link that does not belong to the current non-global zone
1680 * is not allowed.
1681 */
1682 if (zid != GLOBAL_ZONEID && ddp->dd_zid != zid) {
1683 dls_devnet_rele(ddp);
1684 return (ENOENT);
1685 }
1686
1687 err = mac_perim_enter_by_macname(ddp->dd_mac, &mph);
1688 if (err != 0) {
1689 dls_devnet_rele(ddp);
1690 return (err);
1691 }
1692
1693 err = dls_link_hold_create(ddp->dd_mac, &dlp);
1694 mac_perim_exit(mph);
1695
1696 if (err != 0) {
1697 dls_devnet_rele(ddp);
1698 return (err);
1699 }
1700
1701 *dhp = ddp;
1702 *devp = dls_link_dev(dlp);
1703 return (0);
1704 }
1705
1706 /*
1707 * Close access to a vanity naming node.
1708 */
1709 void
1710 dls_devnet_close(dls_dl_handle_t dlh)
1711 {
1712 dls_devnet_t *ddp = dlh;
1713 dls_link_t *dlp;
1714 mac_perim_handle_t mph;
1715
1716 VERIFY(mac_perim_enter_by_macname(ddp->dd_mac, &mph) == 0);
1717 VERIFY(dls_link_hold(ddp->dd_mac, &dlp) == 0);
1718
1719 /*
1720 * One rele for the hold placed in dls_devnet_open, another for
1721 * the hold done just above
1722 */
1723 dls_link_rele(dlp);
1724 dls_link_rele(dlp);
1725 mac_perim_exit(mph);
1726
1727 dls_devnet_rele(ddp);
1728 }
1729
1730 /*
1731 * This is used by /dev/net to rebuild the nodes for readdir(). It is not
1732 * critical and no protection is needed.
1733 */
1734 boolean_t
1735 dls_devnet_rebuild()
1736 {
1737 boolean_t updated = devnet_need_rebuild;
1738
1739 devnet_need_rebuild = B_FALSE;
1740 return (updated);
1741 }
1742
1743 int
1744 dls_devnet_create(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid)
1745 {
1746 dls_link_t *dlp;
1747 dls_devnet_t *ddp;
1748 int err;
1749 mac_perim_handle_t mph;
1750
1751 /*
1752 * Holding the mac perimeter ensures that the downcall from the
1753 * dlmgmt daemon which does the property loading does not proceed
1754 * until we relinquish the perimeter.
1755 */
1756 mac_perim_enter_by_mh(mh, &mph);
1757 /*
1758 * Make this association before we call dls_link_hold_create as
1759 * we need to use the linkid to get the user name for the link
1760 * when we create the MAC client.
1761 */
1762 if ((err = dls_devnet_set(mac_name(mh), linkid, zoneid, &ddp)) == 0) {
1763 if ((err = dls_link_hold_create(mac_name(mh), &dlp)) != 0) {
1764 mac_perim_exit(mph);
1765 (void) dls_devnet_unset(mac_name(mh), &linkid, B_TRUE);
1766 return (err);
1767 }
1768 }
1769 mac_perim_exit(mph);
1770 return (err);
1771 }
1772
1773 /*
1774 * Set the linkid of the dls_devnet_t and add it into the i_dls_devnet_id_hash.
1775 * This is called in the case that the dlmgmtd daemon is started later than
1776 * the physical devices get attached, and the linkid is only known after the
1777 * daemon starts.
1778 */
1779 int
1780 dls_devnet_recreate(mac_handle_t mh, datalink_id_t linkid)
1781 {
1782 ASSERT(linkid != DATALINK_INVALID_LINKID);
1783 return (dls_devnet_set(mac_name(mh), linkid, GLOBAL_ZONEID, NULL));
1784 }
1785
1786 int
1787 dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait)
1788 {
1789 int err;
1790 mac_perim_handle_t mph;
1791
1792 *idp = DATALINK_INVALID_LINKID;
1793 err = dls_devnet_unset(mac_name(mh), idp, wait);
1794 if (err != 0 && err != ENOENT)
1795 return (err);
1796
1797 mac_perim_enter_by_mh(mh, &mph);
1798 err = dls_link_rele_by_name(mac_name(mh));
1799 mac_perim_exit(mph);
1800
1801 if (err != 0) {
1802 /*
1803 * XXX It is a general GLDv3 bug that dls_devnet_set() has to
1804 * be called to re-set the link when destroy fails. The
1805 * zoneid below will be incorrect if this function is ever
1806 * called from kernel context or from a zone other than that
1807 * which initially created the link.
1808 */
1809 (void) dls_devnet_set(mac_name(mh), *idp, crgetzoneid(CRED()),
1810 NULL);
1811 }
1812 return (err);
1813 }
1814
1815 /*
1816 * Implicitly create an IP tunnel link.
1817 */
1818 static int
1819 i_dls_devnet_create_iptun(const char *linkname, const char *drvname,
1820 datalink_id_t *linkid)
1821 {
1822 int err;
1823 iptun_kparams_t ik;
1824 uint32_t media;
1825 netstack_t *ns;
1826 major_t iptun_major;
1827 dev_info_t *iptun_dip;
1828
1829 /* First ensure that the iptun device is attached. */
1830 if ((iptun_major = ddi_name_to_major(IPTUN_DRIVER_NAME)) == (major_t)-1)
1831 return (EINVAL);
1832 if ((iptun_dip = ddi_hold_devi_by_instance(iptun_major, 0, 0)) == NULL)
1833 return (EINVAL);
1834
1835 if (IS_IPV4_TUN(drvname)) {
1836 ik.iptun_kparam_type = IPTUN_TYPE_IPV4;
1837 media = DL_IPV4;
1838 } else if (IS_6TO4_TUN(drvname)) {
1839 ik.iptun_kparam_type = IPTUN_TYPE_6TO4;
1840 media = DL_6TO4;
1841 } else if (IS_IPV6_TUN(drvname)) {
1842 ik.iptun_kparam_type = IPTUN_TYPE_IPV6;
1843 media = DL_IPV6;
1844 }
1845 ik.iptun_kparam_flags = (IPTUN_KPARAM_TYPE | IPTUN_KPARAM_IMPLICIT);
1846
1847 /* Obtain a datalink id for this tunnel. */
1848 err = dls_mgmt_create((char *)linkname, 0, DATALINK_CLASS_IPTUN, media,
1849 B_FALSE, &ik.iptun_kparam_linkid);
1850 if (err != 0) {
1851 ddi_release_devi(iptun_dip);
1852 return (err);
1853 }
1854
1855 ns = netstack_get_current();
1856 err = iptun_create(&ik, CRED());
1857 netstack_rele(ns);
1858
1859 if (err != 0)
1860 VERIFY(dls_mgmt_destroy(ik.iptun_kparam_linkid, B_FALSE) == 0);
1861 else
1862 *linkid = ik.iptun_kparam_linkid;
1863
1864 ddi_release_devi(iptun_dip);
1865 return (err);
1866 }
1867
1868 static int
1869 i_dls_devnet_destroy_iptun(datalink_id_t linkid)
1870 {
1871 int err;
1872
1873 /*
1874 * Note the use of zone_kcred() here as opposed to CRED(). This is
1875 * because the process that does the last close of this /dev/net node
1876 * may not have necessary privileges to delete this IP tunnel, but the
1877 * tunnel must always be implicitly deleted on last close.
1878 */
1879 if ((err = iptun_delete(linkid, zone_kcred())) == 0)
1880 (void) dls_mgmt_destroy(linkid, B_FALSE);
1881 return (err);
1882 }
1883
1884 const char *
1885 dls_devnet_mac(dls_dl_handle_t ddh)
1886 {
1887 return (ddh->dd_mac);
1888 }
1889
1890 datalink_id_t
1891 dls_devnet_linkid(dls_dl_handle_t ddh)
1892 {
1893 return (ddh->dd_linkid);
1894 }