Print this page
    
usr/src/cmd/dlmgmtd/dlmgmt_door.c
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/io/dls/dls_mgmt.c
          +++ new/usr/src/uts/common/io/dls/dls_mgmt.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   * Copyright 2019 Joyent, Inc.
  25   25   */
  26   26  /*
  27   27   * Copyright (c) 2016 by Delphix. All rights reserved.
  28   28   */
  29   29  
  30   30  /*
  31   31   * Datalink management routines.
  32   32   */
  33   33  
  34   34  #include <sys/types.h>
  35   35  #include <sys/door.h>
  36   36  #include <sys/zone.h>
  37   37  #include <sys/modctl.h>
  38   38  #include <sys/file.h>
  39   39  #include <sys/modhash.h>
  40   40  #include <sys/kstat.h>
  41   41  #include <sys/vnode.h>
  42   42  #include <sys/cmn_err.h>
  43   43  #include <sys/softmac.h>
  44   44  #include <sys/dls.h>
  45   45  #include <sys/dls_impl.h>
  46   46  #include <sys/stropts.h>
  47   47  #include <sys/netstack.h>
  48   48  #include <inet/iptun/iptun_impl.h>
  49   49  
  50   50  /*
  51   51   * This vanity name management module is treated as part of the GLD framework
  52   52   * and we don't hold any GLD framework lock across a call to any mac
  53   53   * function that needs to acquire the mac perimeter. The hierarchy is
  54   54   * mac perimeter -> framework locks
  55   55   */
  56   56  
  57   57  typedef struct dls_stack {
  58   58          zoneid_t        dlss_zoneid;
  59   59  } dls_stack_t;
  60   60  
  61   61  static kmem_cache_t     *i_dls_devnet_cachep;
  62   62  static kmutex_t         i_dls_mgmt_lock;
  63   63  static krwlock_t        i_dls_devnet_lock;
  64   64  static mod_hash_t       *i_dls_devnet_id_hash;
  65   65  static mod_hash_t       *i_dls_devnet_hash;
  66   66  
  67   67  boolean_t               devnet_need_rebuild;
  68   68  
  69   69  #define VLAN_HASHSZ     67      /* prime */
  70   70  
  71   71  /*
  72   72   * The following macros take a link name without the trailing PPA as input.
  73   73   * Opening a /dev/net node with one of these names causes a tunnel link to be
  74   74   * implicitly created in dls_devnet_hold_by_name() for backward compatibility
  75   75   * with Solaris 10 and prior.
  76   76   */
  77   77  #define IS_IPV4_TUN(name)       (strcmp((name), "ip.tun") == 0)
  78   78  #define IS_IPV6_TUN(name)       (strcmp((name), "ip6.tun") == 0)
  79   79  #define IS_6TO4_TUN(name)       (strcmp((name), "ip.6to4tun") == 0)
  80   80  #define IS_IPTUN_LINK(name)     (                                       \
  81   81      IS_IPV4_TUN(name) || IS_IPV6_TUN(name) || IS_6TO4_TUN(name))
  82   82  
  83   83  /* Upcall door handle */
  84   84  static door_handle_t    dls_mgmt_dh = NULL;
  85   85  
  86   86  /* dls_devnet_t dd_flags */
  87   87  #define DD_CONDEMNED            0x1
  88   88  #define DD_IMPLICIT_IPTUN       0x2 /* Implicitly-created ip*.*tun* tunnel */
  89   89  #define DD_INITIALIZING         0x4
  90   90  
  91   91  /*
  92   92   * If the link is marked as initializing or condemned then it should
  93   93   * not be visible outside of the DLS framework.
  94   94   */
  95   95  #define DD_NOT_VISIBLE(flags)   (                                       \
  96   96          (flags & (DD_CONDEMNED | DD_INITIALIZING)) != 0)
  97   97  
  98   98  /*
  99   99   * This structure is used to keep the <linkid, macname> mapping.
 100  100   * This structure itself is not protected by the mac perimeter, but is
 101  101   * protected by the dd_mutex and i_dls_devnet_lock. Thus most of the
 102  102   * functions manipulating this structure such as dls_devnet_set/unset etc.
 103  103   * may be called while not holding the mac perimeter.
 104  104   */
 105  105  typedef struct dls_devnet_s {
 106  106          datalink_id_t   dd_linkid;
 107  107          char            dd_linkname[MAXLINKNAMELEN];
 108  108          char            dd_mac[MAXNAMELEN];
 109  109          kstat_t         *dd_ksp;        /* kstat in owner_zid */
 110  110          kstat_t         *dd_zone_ksp;   /* in dd_zid if != owner_zid */
 111  111          uint32_t        dd_ref;
 112  112          kmutex_t        dd_mutex;
 113  113          kcondvar_t      dd_cv;
 114  114          uint32_t        dd_tref;
 115  115          uint_t          dd_flags;
 116  116          zoneid_t        dd_owner_zid;   /* zone where node was created */
 117  117          zoneid_t        dd_zid;         /* current zone */
 118  118          boolean_t       dd_prop_loaded;
 119  119          taskqid_t       dd_prop_taskid;
 120  120          boolean_t       dd_transient;   /* link goes away when zone does */
 121  121  } dls_devnet_t;
 122  122  
 123  123  static int i_dls_devnet_create_iptun(const char *, const char *,
 124  124      datalink_id_t *);
 125  125  static int i_dls_devnet_destroy_iptun(datalink_id_t);
 126  126  static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t, boolean_t);
 127  127  static int dls_devnet_unset(mac_handle_t, datalink_id_t *, boolean_t);
 128  128  
 129  129  /*ARGSUSED*/
 130  130  static int
 131  131  i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
 132  132  {
 133  133          dls_devnet_t    *ddp = buf;
 134  134  
 135  135          bzero(buf, sizeof (dls_devnet_t));
 136  136          mutex_init(&ddp->dd_mutex, NULL, MUTEX_DEFAULT, NULL);
 137  137          cv_init(&ddp->dd_cv, NULL, CV_DEFAULT, NULL);
 138  138          return (0);
 139  139  }
 140  140  
 141  141  /*ARGSUSED*/
 142  142  static void
 143  143  i_dls_devnet_destructor(void *buf, void *arg)
 144  144  {
 145  145          dls_devnet_t    *ddp = buf;
 146  146  
 147  147          VERIFY(ddp->dd_ksp == NULL);
 148  148          VERIFY(ddp->dd_ref == 0);
 149  149          VERIFY(ddp->dd_tref == 0);
 150  150          mutex_destroy(&ddp->dd_mutex);
 151  151          cv_destroy(&ddp->dd_cv);
 152  152  }
 153  153  
 154  154  /* ARGSUSED */
 155  155  static int
 156  156  dls_zone_remove(datalink_id_t linkid, void *arg)
 157  157  {
 158  158          dls_devnet_t *ddp;
 159  159  
 160  160          if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
 161  161                  /*
 162  162                   * Don't bother moving transient links back to the global zone
 163  163                   * since we will simply delete them in dls_devnet_unset.
 164  164                   */
 165  165                  if (!ddp->dd_transient)
 166  166                          (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
 167  167                  dls_devnet_rele_tmp(ddp);
 168  168          }
 169  169          return (0);
 170  170  }
 171  171  
 172  172  /* ARGSUSED */
 173  173  static void *
 174  174  dls_stack_init(netstackid_t stackid, netstack_t *ns)
 175  175  {
 176  176          dls_stack_t *dlss;
 177  177  
 178  178          dlss = kmem_zalloc(sizeof (*dlss), KM_SLEEP);
 179  179          dlss->dlss_zoneid = netstackid_to_zoneid(stackid);
 180  180          return (dlss);
 181  181  }
 182  182  
 183  183  /* ARGSUSED */
 184  184  static void
 185  185  dls_stack_shutdown(netstackid_t stackid, void *arg)
 186  186  {
 187  187          dls_stack_t     *dlss = (dls_stack_t *)arg;
 188  188  
 189  189          /* Move remaining datalinks in this zone back to the global zone. */
 190  190          (void) zone_datalink_walk(dlss->dlss_zoneid, dls_zone_remove, NULL);
 191  191  }
 192  192  
 193  193  /* ARGSUSED */
 194  194  static void
 195  195  dls_stack_fini(netstackid_t stackid, void *arg)
 196  196  {
 197  197          dls_stack_t     *dlss = (dls_stack_t *)arg;
 198  198  
 199  199          kmem_free(dlss, sizeof (*dlss));
 200  200  }
 201  201  
 202  202  /*
 203  203   * Module initialization and finalization functions.
 204  204   */
 205  205  void
 206  206  dls_mgmt_init(void)
 207  207  {
 208  208          mutex_init(&i_dls_mgmt_lock, NULL, MUTEX_DEFAULT, NULL);
 209  209          rw_init(&i_dls_devnet_lock, NULL, RW_DEFAULT, NULL);
 210  210  
 211  211          /*
 212  212           * Create a kmem_cache of dls_devnet_t structures.
 213  213           */
 214  214          i_dls_devnet_cachep = kmem_cache_create("dls_devnet_cache",
 215  215              sizeof (dls_devnet_t), 0, i_dls_devnet_constructor,
 216  216              i_dls_devnet_destructor, NULL, NULL, NULL, 0);
 217  217          ASSERT(i_dls_devnet_cachep != NULL);
 218  218  
 219  219          /*
 220  220           * Create a hash table, keyed by dd_linkid, of dls_devnet_t.
 221  221           */
 222  222          i_dls_devnet_id_hash = mod_hash_create_idhash("dls_devnet_id_hash",
 223  223              VLAN_HASHSZ, mod_hash_null_valdtor);
 224  224  
 225  225          /*
 226  226           * Create a hash table, keyed by dd_mac
 227  227           */
 228  228          i_dls_devnet_hash = mod_hash_create_extended("dls_devnet_hash",
 229  229              VLAN_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
 230  230              mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
 231  231  
 232  232          devnet_need_rebuild = B_FALSE;
 233  233  
 234  234          netstack_register(NS_DLS, dls_stack_init, dls_stack_shutdown,
 235  235              dls_stack_fini);
 236  236  }
 237  237  
 238  238  void
 239  239  dls_mgmt_fini(void)
 240  240  {
 241  241          netstack_unregister(NS_DLS);
 242  242          mod_hash_destroy_hash(i_dls_devnet_hash);
 243  243          mod_hash_destroy_hash(i_dls_devnet_id_hash);
 244  244          kmem_cache_destroy(i_dls_devnet_cachep);
 245  245          rw_destroy(&i_dls_devnet_lock);
 246  246          mutex_destroy(&i_dls_mgmt_lock);
 247  247  }
 248  248  
 249  249  int
 250  250  dls_mgmt_door_set(boolean_t start)
 251  251  {
 252  252          int     err;
 253  253  
 254  254          /* handle daemon restart */
 255  255          mutex_enter(&i_dls_mgmt_lock);
 256  256          if (dls_mgmt_dh != NULL) {
 257  257                  door_ki_rele(dls_mgmt_dh);
 258  258                  dls_mgmt_dh = NULL;
 259  259          }
 260  260  
 261  261          if (start && ((err = door_ki_open(DLMGMT_DOOR, &dls_mgmt_dh)) != 0)) {
 262  262                  mutex_exit(&i_dls_mgmt_lock);
 263  263                  return (err);
 264  264          }
 265  265  
 266  266          mutex_exit(&i_dls_mgmt_lock);
 267  267  
 268  268          /*
 269  269           * Create and associate <link name, linkid> mapping for network devices
 270  270           * which are already attached before the daemon is started.
 271  271           */
 272  272          if (start)
 273  273                  softmac_recreate();
 274  274          return (0);
 275  275  }
 276  276  
 277  277  static boolean_t
 278  278  i_dls_mgmt_door_revoked(door_handle_t dh)
 279  279  {
 280  280          struct door_info info;
 281  281          extern int sys_shutdown;
 282  282  
 283  283          ASSERT(dh != NULL);
 284  284  
 285  285          if (sys_shutdown) {
 286  286                  cmn_err(CE_NOTE, "dls_mgmt_door: shutdown observed\n");
 287  287                  return (B_TRUE);
 288  288          }
 289  289  
 290  290          if (door_ki_info(dh, &info) != 0)
 291  291                  return (B_TRUE);
 292  292  
 293  293          return ((info.di_attributes & DOOR_REVOKED) != 0);
 294  294  }
 295  295  
 296  296  /*
 297  297   * Upcall to the datalink management daemon (dlmgmtd).
 298  298   */
 299  299  static int
 300  300  i_dls_mgmt_upcall(void *arg, size_t asize, void *rbuf, size_t rsize)
 301  301  {
 302  302          door_arg_t                      darg, save_arg;
 303  303          door_handle_t                   dh;
 304  304          int                             err;
 305  305          int                             retry = 0;
 306  306  
 307  307  #define MAXRETRYNUM     3
 308  308  
 309  309          ASSERT(arg);
 310  310          darg.data_ptr = arg;
 311  311          darg.data_size = asize;
 312  312          darg.desc_ptr = NULL;
 313  313          darg.desc_num = 0;
 314  314          darg.rbuf = rbuf;
 315  315          darg.rsize = rsize;
 316  316          save_arg = darg;
 317  317  
 318  318  retry:
 319  319          mutex_enter(&i_dls_mgmt_lock);
 320  320          dh = dls_mgmt_dh;
 321  321          if ((dh == NULL) || i_dls_mgmt_door_revoked(dh)) {
 322  322                  mutex_exit(&i_dls_mgmt_lock);
 323  323                  return (EBADF);
 324  324          }
 325  325          door_ki_hold(dh);
 326  326          mutex_exit(&i_dls_mgmt_lock);
 327  327  
 328  328          for (;;) {
 329  329                  retry++;
 330  330                  if ((err = door_ki_upcall_limited(dh, &darg, zone_kcred(),
 331  331                      SIZE_MAX, 0)) == 0)
 332  332                          break;
 333  333  
 334  334                  /*
 335  335                   * handle door call errors
 336  336                   */
 337  337                  darg = save_arg;
 338  338                  switch (err) {
 339  339                  case EINTR:
 340  340                          /*
 341  341                           * If the operation which caused this door upcall gets
 342  342                           * interrupted, return directly.
 343  343                           */
 344  344                          goto done;
 345  345                  case EAGAIN:
 346  346                          /*
 347  347                           * Repeat upcall if the maximum attempt limit has not
 348  348                           * been reached.
 349  349                           */
 350  350                          if (retry < MAXRETRYNUM) {
 351  351                                  delay(2 * hz);
 352  352                                  break;
 353  353                          }
 354  354                          cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
 355  355                          goto done;
 356  356                  default:
 357  357                          /* A fatal door error */
 358  358                          if (i_dls_mgmt_door_revoked(dh)) {
 359  359                                  cmn_err(CE_NOTE,
 360  360                                      "dls: dlmgmtd door service revoked\n");
 361  361  
 362  362                                  if (retry < MAXRETRYNUM) {
 363  363                                          door_ki_rele(dh);
 364  364                                          goto retry;
 365  365                                  }
 366  366                          }
 367  367                          cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
 368  368                          goto done;
 369  369                  }
 370  370          }
 371  371  
 372  372          if (darg.rbuf != rbuf) {
 373  373                  /*
 374  374                   * The size of the input rbuf was not big enough, so the
 375  375                   * upcall allocated the rbuf itself.  If this happens, assume
 376  376                   * that this was an invalid door call request.
 377  377                   */
 378  378                  kmem_free(darg.rbuf, darg.rsize);
 379  379                  err = ENOSPC;
 380  380                  goto done;
 381  381          }
 382  382  
 383  383          if (darg.rsize != rsize) {
 384  384                  err = EINVAL;
 385  385                  goto done;
 386  386          }
 387  387  
 388  388          err = ((dlmgmt_retval_t *)rbuf)->lr_err;
 389  389  
 390  390  done:
 391  391          door_ki_rele(dh);
 392  392          return (err);
 393  393  }
 394  394  
 395  395  /*
 396  396   * Request the datalink management daemon to create a link with the attributes
 397  397   * below.  Upon success, zero is returned and linkidp contains the linkid for
 398  398   * the new link; otherwise, an errno is returned.
 399  399   *
 400  400   *     - dev            physical dev_t.  required for all physical links,
 401  401   *                      including GLDv3 links.  It will be used to force the
 402  402   *                      attachment of a physical device, hence the
 403  403   *                      registration of its mac
 404  404   *     - class          datalink class
 405  405   *     - media type     media type; DL_OTHER means unknown
 406  406   *     - persist        whether to persist the datalink
 407  407   */
 408  408  int
 409  409  dls_mgmt_create(const char *devname, dev_t dev, datalink_class_t class,
 410  410      uint32_t media, boolean_t persist, datalink_id_t *linkidp)
 411  411  {
 412  412          dlmgmt_upcall_arg_create_t      create;
 413  413          dlmgmt_create_retval_t          retval;
 414  414          int                             err;
 415  415  
 416  416          create.ld_cmd = DLMGMT_CMD_DLS_CREATE;
 417  417          create.ld_class = class;
 418  418          create.ld_media = media;
 419  419          create.ld_phymaj = getmajor(dev);
 420  420          create.ld_phyinst = getminor(dev);
 421  421          create.ld_persist = persist;
 422  422          if (strlcpy(create.ld_devname, devname, sizeof (create.ld_devname)) >=
 423  423              sizeof (create.ld_devname))
 424  424                  return (EINVAL);
 425  425  
 426  426          if ((err = i_dls_mgmt_upcall(&create, sizeof (create), &retval,
 427  427              sizeof (retval))) == 0) {
 428  428                  *linkidp = retval.lr_linkid;
 429  429          }
 430  430          return (err);
 431  431  }
 432  432  
 433  433  /*
 434  434   * Request the datalink management daemon to destroy the specified link.
 435  435   * Returns zero upon success, or an errno upon failure.
 436  436   */
 437  437  int
 438  438  dls_mgmt_destroy(datalink_id_t linkid, boolean_t persist)
 439  439  {
 440  440          dlmgmt_upcall_arg_destroy_t     destroy;
 441  441          dlmgmt_destroy_retval_t         retval;
 442  442  
 443  443          destroy.ld_cmd = DLMGMT_CMD_DLS_DESTROY;
 444  444          destroy.ld_linkid = linkid;
 445  445          destroy.ld_persist = persist;
 446  446  
 447  447          return (i_dls_mgmt_upcall(&destroy, sizeof (destroy),
 448  448              &retval, sizeof (retval)));
 449  449  }
 450  450  
 451  451  /*
 452  452   * Request the datalink management daemon to verify/update the information
 453  453   * for a physical link.  Upon success, get its linkid.
 454  454   *
 455  455   *     - media type     media type
 456  456   *     - novanity       whether this physical datalink supports vanity naming.
 457  457   *                      physical links that do not use the GLDv3 MAC plugin
 458  458   *                      cannot suport vanity naming
 459  459   *
 460  460   * This function could fail with ENOENT or EEXIST.  Two cases return EEXIST:
 461  461   *
 462  462   * 1. A link with devname already exists, but the media type does not match.
 463  463   *    In this case, mediap will bee set to the media type of the existing link.
 464  464   * 2. A link with devname already exists, but its link name does not match
 465  465   *    the device name, although this link does not support vanity naming.
 466  466   */
 467  467  int
 468  468  dls_mgmt_update(const char *devname, uint32_t media, boolean_t novanity,
 469  469      uint32_t *mediap, datalink_id_t *linkidp)
 470  470  {
 471  471          dlmgmt_upcall_arg_update_t      update;
 472  472          dlmgmt_update_retval_t          retval;
 473  473          int                             err;
 474  474  
 475  475          update.ld_cmd = DLMGMT_CMD_DLS_UPDATE;
 476  476  
 477  477          if (strlcpy(update.ld_devname, devname, sizeof (update.ld_devname)) >=
 478  478              sizeof (update.ld_devname))
 479  479                  return (EINVAL);
 480  480  
 481  481          update.ld_media = media;
 482  482          update.ld_novanity = novanity;
 483  483  
 484  484          if ((err = i_dls_mgmt_upcall(&update, sizeof (update), &retval,
 485  485              sizeof (retval))) == EEXIST) {
 486  486                  *linkidp = retval.lr_linkid;
 487  487                  *mediap = retval.lr_media;
 488  488          } else if (err == 0) {
 489  489                  *linkidp = retval.lr_linkid;
 490  490          }
 491  491  
 492  492          return (err);
 493  493  }
 494  494  
 495  495  /*
 496  496   * Request the datalink management daemon to get the information for a link.
 497  497   * Returns zero upon success, or an errno upon failure.
 498  498   *
 499  499   * Only fills in information for argument pointers that are non-NULL.
 500  500   * Note that the link argument is expected to be MAXLINKNAMELEN bytes.
 501  501   */
 502  502  int
 503  503  dls_mgmt_get_linkinfo(datalink_id_t linkid, char *link,
 504  504      datalink_class_t *classp, uint32_t *mediap, uint32_t *flagsp)
 505  505  {
 506  506          dlmgmt_door_getname_t   getname;
 507  507          dlmgmt_getname_retval_t retval;
 508  508          int                     err, len;
 509  509  
 510  510          getname.ld_cmd = DLMGMT_CMD_GETNAME;
 511  511          getname.ld_linkid = linkid;
 512  512  
 513  513          if ((err = i_dls_mgmt_upcall(&getname, sizeof (getname), &retval,
 514  514              sizeof (retval))) != 0) {
 515  515                  return (err);
 516  516          }
 517  517  
 518  518          len = strlen(retval.lr_link);
 519  519          if (len <= 1 || len >= MAXLINKNAMELEN)
 520  520                  return (EINVAL);
 521  521  
 522  522          if (link != NULL)
 523  523                  (void) strlcpy(link, retval.lr_link, MAXLINKNAMELEN);
 524  524          if (classp != NULL)
 525  525                  *classp = retval.lr_class;
 526  526          if (mediap != NULL)
 527  527                  *mediap = retval.lr_media;
 528  528          if (flagsp != NULL)
 529  529                  *flagsp = retval.lr_flags;
 530  530          return (0);
 531  531  }
 532  532  
 533  533  /*
 534  534   * Request the datalink management daemon to get the linkid for a link.
 535  535   * Returns a non-zero error code on failure.  The linkid argument is only
 536  536   * set on success (when zero is returned.)
 537  537   */
 538  538  int
 539  539  dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid)
 540  540  {
 541  541          dlmgmt_door_getlinkid_t         getlinkid;
 542  542          dlmgmt_getlinkid_retval_t       retval;
 543  543          int                             err;
 544  544  
 545  545          getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
 546  546          (void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
 547  547          getlinkid.ld_zoneid = getzoneid();
 548  548  
 549  549          if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
 550  550              sizeof (retval))) == 0) {
 551  551                  *linkid = retval.lr_linkid;
 552  552          }
 553  553          return (err);
 554  554  }
 555  555  
 556  556  int
 557  557  dls_mgmt_get_linkid_in_zone(const char *link, datalink_id_t *linkid,
 558  558      zoneid_t zid)
 559  559  {
 560  560          dlmgmt_door_getlinkid_t         getlinkid;
 561  561          dlmgmt_getlinkid_retval_t       retval;
 562  562          int                             err;
 563  563  
 564  564          ASSERT(getzoneid() == GLOBAL_ZONEID || zid == getzoneid());
 565  565          getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
 566  566          (void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
 567  567          getlinkid.ld_zoneid = zid;
 568  568  
 569  569          if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
 570  570              sizeof (retval))) == 0) {
 571  571                  *linkid = retval.lr_linkid;
 572  572          }
 573  573          return (err);
 574  574  }
 575  575  
 576  576  
 577  577  datalink_id_t
 578  578  dls_mgmt_get_next(datalink_id_t linkid, datalink_class_t class,
 579  579      datalink_media_t dmedia, uint32_t flags)
 580  580  {
 581  581          dlmgmt_door_getnext_t   getnext;
 582  582          dlmgmt_getnext_retval_t retval;
 583  583  
 584  584          getnext.ld_cmd = DLMGMT_CMD_GETNEXT;
 585  585          getnext.ld_class = class;
 586  586          getnext.ld_dmedia = dmedia;
 587  587          getnext.ld_flags = flags;
 588  588          getnext.ld_linkid = linkid;
 589  589  
 590  590          if (i_dls_mgmt_upcall(&getnext, sizeof (getnext), &retval,
 591  591              sizeof (retval)) != 0) {
 592  592                  return (DATALINK_INVALID_LINKID);
 593  593          }
 594  594  
 595  595          return (retval.lr_linkid);
 596  596  }
 597  597  
 598  598  static int
 599  599  i_dls_mgmt_get_linkattr(const datalink_id_t linkid, const char *attr,
 600  600      void *attrval, size_t *attrszp)
 601  601  {
 602  602          dlmgmt_upcall_arg_getattr_t     getattr;
 603  603          dlmgmt_getattr_retval_t         retval;
 604  604          int                             err;
 605  605  
 606  606          getattr.ld_cmd = DLMGMT_CMD_DLS_GETATTR;
 607  607          getattr.ld_linkid = linkid;
 608  608          (void) strlcpy(getattr.ld_attr, attr, MAXLINKATTRLEN);
 609  609  
 610  610          if ((err = i_dls_mgmt_upcall(&getattr, sizeof (getattr), &retval,
 611  611              sizeof (retval))) == 0) {
 612  612                  if (*attrszp < retval.lr_attrsz)
 613  613                          return (EINVAL);
 614  614                  *attrszp = retval.lr_attrsz;
 615  615                  bcopy(retval.lr_attrval, attrval, retval.lr_attrsz);
 616  616          }
 617  617  
 618  618          return (err);
 619  619  }
 620  620  
 621  621  /*
 622  622   * Note that this function can only get devp successfully for non-VLAN link.
 623  623   */
 624  624  int
 625  625  dls_mgmt_get_phydev(datalink_id_t linkid, dev_t *devp)
 626  626  {
 627  627          uint64_t        maj, inst;
 628  628          size_t          attrsz = sizeof (uint64_t);
 629  629  
 630  630          if (i_dls_mgmt_get_linkattr(linkid, FPHYMAJ, &maj, &attrsz) != 0 ||
 631  631              attrsz != sizeof (uint64_t) ||
 632  632              i_dls_mgmt_get_linkattr(linkid, FPHYINST, &inst, &attrsz) != 0 ||
 633  633              attrsz != sizeof (uint64_t)) {
 634  634                  return (EINVAL);
 635  635          }
 636  636  
 637  637          *devp = makedevice((major_t)maj, (minor_t)inst);
 638  638          return (0);
 639  639  }
 640  640  
 641  641  /*
 642  642   * Request the datalink management daemon to push in
 643  643   * all properties associated with the link.
 644  644   * Returns a non-zero error code on failure.
 645  645   */
 646  646  int
 647  647  dls_mgmt_linkprop_init(datalink_id_t linkid)
 648  648  {
 649  649          dlmgmt_door_linkprop_init_t     li;
 650  650          dlmgmt_linkprop_init_retval_t   retval;
 651  651          int                             err;
 652  652  
 653  653          li.ld_cmd = DLMGMT_CMD_LINKPROP_INIT;
 654  654          li.ld_linkid = linkid;
 655  655  
 656  656          err = i_dls_mgmt_upcall(&li, sizeof (li), &retval, sizeof (retval));
 657  657          return (err);
 658  658  }
 659  659  
 660  660  static void
 661  661  dls_devnet_prop_task(void *arg)
 662  662  {
 663  663          dls_devnet_t            *ddp = arg;
 664  664  
 665  665          (void) dls_mgmt_linkprop_init(ddp->dd_linkid);
 666  666  
 667  667          mutex_enter(&ddp->dd_mutex);
 668  668          ddp->dd_prop_loaded = B_TRUE;
 669  669          ddp->dd_prop_taskid = 0;
 670  670          cv_broadcast(&ddp->dd_cv);
 671  671          mutex_exit(&ddp->dd_mutex);
 672  672  }
 673  673  
 674  674  /*
 675  675   * Ensure property loading task is completed.
 676  676   */
 677  677  void
 678  678  dls_devnet_prop_task_wait(dls_dl_handle_t ddp)
 679  679  {
 680  680          mutex_enter(&ddp->dd_mutex);
 681  681          while (ddp->dd_prop_taskid != 0)
 682  682                  cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
 683  683          mutex_exit(&ddp->dd_mutex);
 684  684  }
 685  685  
 686  686  void
 687  687  dls_devnet_rele_tmp(dls_dl_handle_t dlh)
 688  688  {
 689  689          dls_devnet_t            *ddp = dlh;
 690  690  
 691  691          mutex_enter(&ddp->dd_mutex);
 692  692          ASSERT(ddp->dd_tref != 0);
 693  693          if (--ddp->dd_tref == 0)
 694  694                  cv_signal(&ddp->dd_cv);
 695  695          mutex_exit(&ddp->dd_mutex);
 696  696  }
 697  697  
 698  698  int
 699  699  dls_devnet_hold_link(datalink_id_t linkid, dls_dl_handle_t *ddhp,
 700  700      dls_link_t **dlpp)
 701  701  {
 702  702          dls_dl_handle_t dlh;
 703  703          dls_link_t      *dlp;
 704  704          int             err;
 705  705  
 706  706          if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0)
 707  707                  return (err);
 708  708  
 709  709          if ((err = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) {
 710  710                  dls_devnet_rele_tmp(dlh);
 711  711                  return (err);
 712  712          }
 713  713  
 714  714          ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 715  715  
 716  716          *ddhp = dlh;
 717  717          *dlpp = dlp;
 718  718          return (0);
 719  719  }
 720  720  
 721  721  void
 722  722  dls_devnet_rele_link(dls_dl_handle_t dlh, dls_link_t *dlp)
 723  723  {
 724  724          ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 725  725  
 726  726          dls_link_rele(dlp);
 727  727          dls_devnet_rele_tmp(dlh);
 728  728  }
 729  729  
 730  730  /*
 731  731   * "link" kstats related functions.
 732  732   */
 733  733  
 734  734  /*
 735  735   * Query the "link" kstats.
 736  736   *
 737  737   * We may be called from the kstat subsystem in an arbitrary context.
 738  738   * If the caller is the stack, the context could be an upcall data
 739  739   * thread. Hence we can't acquire the mac perimeter in this function
 740  740   * for fear of deadlock.
 741  741   */
 742  742  static int
 743  743  dls_devnet_stat_update(kstat_t *ksp, int rw)
 744  744  {
 745  745          datalink_id_t   linkid = (datalink_id_t)(uintptr_t)ksp->ks_private;
 746  746          dls_devnet_t    *ddp;
 747  747          dls_link_t      *dlp;
 748  748          int             err;
 749  749  
 750  750          if ((err = dls_devnet_hold_tmp(linkid, &ddp)) != 0) {
 751  751                  return (err);
 752  752          }
 753  753  
 754  754          /*
 755  755           * If a device detach happens at this time, it will block in
 756  756           * dls_devnet_unset since the dd_tref has been bumped in
 757  757           * dls_devnet_hold_tmp(). So the access to 'dlp' is safe even though
 758  758           * we don't hold the mac perimeter.
 759  759           */
 760  760          if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)ddp->dd_mac,
 761  761              (mod_hash_val_t *)&dlp) != 0) {
 762  762                  dls_devnet_rele_tmp(ddp);
 763  763                  return (ENOENT);
 764  764          }
 765  765  
 766  766          err = dls_stat_update(ksp, dlp, rw);
 767  767  
 768  768          dls_devnet_rele_tmp(ddp);
 769  769          return (err);
 770  770  }
 771  771  
 772  772  /*
 773  773   * Create the "link" kstats.
 774  774   */
 775  775  static void
 776  776  dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid, zoneid_t newzoneid)
 777  777  {
 778  778          kstat_t *ksp;
 779  779          char    *nm;
 780  780          char    kname[MAXLINKNAMELEN];
 781  781  
 782  782          if (zoneid != newzoneid) {
 783  783                  ASSERT(zoneid == GLOBAL_ZONEID);
 784  784                  (void) snprintf(kname, sizeof (kname), "z%d_%s", newzoneid,
 785  785                      ddp->dd_linkname);
 786  786                  nm = kname;
 787  787          } else {
 788  788                  nm = ddp->dd_linkname;
 789  789          }
 790  790  
 791  791          if (dls_stat_create("link", 0, nm, zoneid,
 792  792              dls_devnet_stat_update, (void *)(uintptr_t)ddp->dd_linkid,
 793  793              &ksp, newzoneid) == 0) {
 794  794                  ASSERT(ksp != NULL);
 795  795                  if (zoneid == ddp->dd_owner_zid) {
 796  796                          ASSERT(ddp->dd_ksp == NULL);
 797  797                          ddp->dd_ksp = ksp;
 798  798                  } else {
 799  799                          ASSERT(ddp->dd_zone_ksp == NULL);
 800  800                          ddp->dd_zone_ksp = ksp;
 801  801                  }
 802  802          }
 803  803  }
 804  804  
 805  805  /*
 806  806   * Destroy the "link" kstats.
 807  807   */
 808  808  static void
 809  809  dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
 810  810  {
 811  811          if (zoneid == ddp->dd_owner_zid) {
 812  812                  if (ddp->dd_ksp != NULL) {
 813  813                          dls_stat_delete(ddp->dd_ksp);
 814  814                          ddp->dd_ksp = NULL;
 815  815                  }
 816  816          } else {
 817  817                  if (ddp->dd_zone_ksp != NULL) {
 818  818                          dls_stat_delete(ddp->dd_zone_ksp);
 819  819                          ddp->dd_zone_ksp = NULL;
 820  820                  }
 821  821          }
 822  822  }
 823  823  
 824  824  /*
 825  825   * The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
 826  826   * and create the new set using the new name.
 827  827   */
 828  828  static void
 829  829  dls_devnet_stat_rename(dls_devnet_t *ddp, boolean_t zoneinit)
 830  830  {
 831  831          if (ddp->dd_ksp != NULL) {
 832  832                  dls_stat_delete(ddp->dd_ksp);
 833  833                  ddp->dd_ksp = NULL;
 834  834          }
 835  835          if (zoneinit && ddp->dd_zone_ksp != NULL) {
 836  836                  dls_stat_delete(ddp->dd_zone_ksp);
 837  837                  ddp->dd_zone_ksp = NULL;
 838  838          }
 839  839          /*
 840  840           * We can't rename a link while it's assigned to a non-global zone
 841  841           * unless we're first initializing the zone while readying it.
 842  842           */
 843  843          ASSERT(ddp->dd_zone_ksp == NULL);
 844  844          dls_devnet_stat_create(ddp, ddp->dd_owner_zid,
 845  845              (zoneinit ? ddp->dd_zid : ddp->dd_owner_zid));
 846  846          if (zoneinit)
 847  847                  dls_devnet_stat_create(ddp, ddp->dd_zid, ddp->dd_zid);
 848  848  }
 849  849  
 850  850  /*
 851  851   * Associate the linkid with the link identified by macname. If this
 852  852   * is called on behalf of a physical link then linkid may be
 853  853   * DATALINK_INVALID_LINKID. Otherwise, if called on behalf of a
 854  854   * virtual link, linkid must have a value.
 855  855   */
 856  856  static int
 857  857  dls_devnet_set(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid,
 858  858      dls_devnet_t **ddpp)
 859  859  {
 860  860          const char              *macname = mac_name(mh);
 861  861          dls_devnet_t            *ddp = NULL;
 862  862          datalink_class_t        class;
 863  863          int                     err;
 864  864          boolean_t               stat_create = B_FALSE;
 865  865          char                    linkname[MAXLINKNAMELEN];
 866  866  
 867  867          rw_enter(&i_dls_devnet_lock, RW_WRITER);
 868  868  
 869  869          /*
 870  870           * Don't allow callers to set a link name with a linkid that already
 871  871           * has a name association (that's what rename is for).
 872  872           */
 873  873          if (linkid != DATALINK_INVALID_LINKID) {
 874  874                  if (mod_hash_find(i_dls_devnet_id_hash,
 875  875                      (mod_hash_key_t)(uintptr_t)linkid,
 876  876                      (mod_hash_val_t *)&ddp) == 0) {
 877  877                          err = EEXIST;
 878  878                          goto done;
 879  879                  }
 880  880                  if ((err = dls_mgmt_get_linkinfo(linkid, linkname, &class,
 881  881                      NULL, NULL)) != 0)
 882  882                          goto done;
 883  883          }
 884  884  
 885  885          if ((err = mod_hash_find(i_dls_devnet_hash,
 886  886              (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) == 0) {
 887  887                  if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
 888  888                          err = EEXIST;
 889  889                          goto done;
 890  890                  }
 891  891  
 892  892                  /*
 893  893                   * If we arrive here we know we are attempting to set
 894  894                   * the linkid on a physical link. A virtual link
 895  895                   * should never arrive here because it should never
 896  896                   * call this function without a linkid. Virtual links
 897  897                   * are created through dlgmtmd and thus we know
 898  898                   * dlmgmtd is alive to assign it a linkid (search for
 899  899                   * uses of dladm_create_datalink_id() to prove this to
 900  900                   * yourself); we don't have the same guarantee for a
 901  901                   * physical link which may perform an upcall for a
 902  902                   * linkid while dlmgmtd is down but will continue
 903  903                   * creating a devnet without the linkid (see
 904  904                   * softmac_create_datalink() to see how physical link
 905  905                   * creation works). That is why there is no entry in
 906  906                   * the id hash but there is one in the macname hash --
 907  907                   * softmac couldn't acquire a linkid the first time it
 908  908                   * called this function.
 909  909                   *
 910  910                   * Because of the check above, we also know that
 911  911                   * ddp->dd_linkid is not set. Following this, the link
 912  912                   * must still be in the DD_INITIALIZING state because
 913  913                   * that flag is removed IFF dd_linkid is set. This is
 914  914                   * why we can ASSERT the DD_INITIALIZING flag below if
 915  915                   * the call to i_dls_devnet_setzid() fails.
 916  916                   */
 917  917                  if (linkid == DATALINK_INVALID_LINKID ||
 918  918                      class != DATALINK_CLASS_PHYS) {
 919  919                          err = EINVAL;
 920  920                          goto done;
  
    | 
      ↓ open down ↓ | 
    920 lines elided | 
    
      ↑ open up ↑ | 
  
 921  921                  }
 922  922  
 923  923                  ASSERT(ddp->dd_flags & DD_INITIALIZING);
 924  924  
 925  925          } else {
 926  926                  ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP);
 927  927                  ddp->dd_flags = DD_INITIALIZING;
 928  928                  ddp->dd_tref = 0;
 929  929                  ddp->dd_ref++;
 930  930                  ddp->dd_owner_zid = zoneid;
      931 +                /*
      932 +                 * If we are creating a new devnet which will be owned by a NGZ
      933 +                 * then mark it as transient. This link has never been in the
      934 +                 * GZ, the GZ will not have a hold on its reference, and we do
      935 +                 * not want to return it to the GZ when the zone halts.
      936 +                 */
      937 +                if (zoneid != GLOBAL_ZONEID)
      938 +                        ddp->dd_transient = B_TRUE;
 931  939                  (void) strlcpy(ddp->dd_mac, macname, sizeof (ddp->dd_mac));
 932  940                  VERIFY(mod_hash_insert(i_dls_devnet_hash,
 933  941                      (mod_hash_key_t)ddp->dd_mac, (mod_hash_val_t)ddp) == 0);
 934  942          }
 935  943  
 936  944          if (linkid != DATALINK_INVALID_LINKID) {
 937  945                  ddp->dd_linkid = linkid;
 938  946                  (void) strlcpy(ddp->dd_linkname, linkname,
 939  947                      sizeof (ddp->dd_linkname));
 940  948                  VERIFY(mod_hash_insert(i_dls_devnet_id_hash,
 941  949                      (mod_hash_key_t)(uintptr_t)linkid,
 942  950                      (mod_hash_val_t)ddp) == 0);
 943  951                  devnet_need_rebuild = B_TRUE;
 944  952                  stat_create = B_TRUE;
 945      -                mutex_enter(&ddp->dd_mutex);
 946      -                if (!ddp->dd_prop_loaded && (ddp->dd_prop_taskid == 0)) {
 947      -                        ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
 948      -                            dls_devnet_prop_task, ddp, TQ_SLEEP);
 949      -                }
 950      -                mutex_exit(&ddp->dd_mutex);
 951  953          }
 952  954          err = 0;
 953  955  done:
 954  956          /*
 955  957           * It is safe to drop the i_dls_devnet_lock at this point. In the case
 956  958           * of physical devices, the softmac framework will fail the device
 957  959           * detach based on the smac_state or smac_hold_cnt. Other cases like
 958  960           * vnic and aggr use their own scheme to serialize creates and deletes
 959  961           * and ensure that *ddp is valid.
 960  962           */
 961  963          rw_exit(&i_dls_devnet_lock);
      964 +
      965 +        if (err == 0 && zoneid != GLOBAL_ZONEID) {
      966 +                /*
      967 +                 * If this link is being created directly within a non-global
      968 +                 * zone, then flag it as transient so that it will be cleaned
      969 +                 * up when the zone is shut down.
      970 +                 */
      971 +                err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE, B_TRUE);
      972 +                if (err != 0) {
      973 +                        /*
      974 +                         * At this point the link is marked as
      975 +                         * DD_INITIALIZING -- there can be no
      976 +                         * outstanding temp refs and therefore no need
      977 +                         * to wait for them.
      978 +                         */
      979 +                        ASSERT(ddp->dd_flags & DD_INITIALIZING);
      980 +                        (void) dls_devnet_unset(mh, &linkid, B_FALSE);
      981 +                        return (err);
      982 +                }
      983 +        }
      984 +
 962  985          if (err == 0) {
 963  986                  if (zoneid != GLOBAL_ZONEID &&
 964  987                      (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE,
 965  988                      B_FALSE)) != 0) {
 966  989                          /*
 967  990                           * At this point the link is marked as
 968  991                           * DD_INITIALIZING -- there can be no
 969  992                           * outstanding temp refs and therefore no need
 970  993                           * to wait for them.
 971  994                           */
 972  995                          ASSERT(ddp->dd_flags & DD_INITIALIZING);
 973  996                          (void) dls_devnet_unset(mh, &linkid, B_FALSE);
 974  997                          return (err);
 975  998                  }
 976  999  
 977 1000                  /*
 978 1001                   * The kstat subsystem holds its own locks (rather perimeter)
  
    | 
      ↓ open down ↓ | 
    7 lines elided | 
    
      ↑ open up ↑ | 
  
 979 1002                   * before calling the ks_update (dls_devnet_stat_update) entry
 980 1003                   * point which in turn grabs the i_dls_devnet_lock. So the
 981 1004                   * lock hierarchy is kstat locks -> i_dls_devnet_lock.
 982 1005                   */
 983 1006                  if (stat_create)
 984 1007                          dls_devnet_stat_create(ddp, zoneid, zoneid);
 985 1008                  if (ddpp != NULL)
 986 1009                          *ddpp = ddp;
 987 1010  
 988 1011                  mutex_enter(&ddp->dd_mutex);
 989      -                if (linkid != DATALINK_INVALID_LINKID && !ddp->dd_prop_loaded &&
 990      -                    ddp->dd_prop_taskid == TASKQID_INVALID) {
     1012 +                if (linkid != DATALINK_INVALID_LINKID &&
     1013 +                    !ddp->dd_prop_loaded && ddp->dd_prop_taskid == 0) {
 991 1014                          ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
 992 1015                              dls_devnet_prop_task, ddp, TQ_SLEEP);
 993 1016                  }
 994 1017                  mutex_exit(&ddp->dd_mutex);
 995 1018  
 996 1019          }
 997 1020          return (err);
 998 1021  }
 999 1022  
1000 1023  /*
1001 1024   * Disassociate the linkid from the link identified by macname. If
1002 1025   * wait is B_TRUE, wait until all temporary refs are released and the
1003 1026   * prop task is finished.
1004 1027   *
1005 1028   * If waiting then you SHOULD NOT call this from inside the MAC perim
1006 1029   * as deadlock will ensue. Otherwise, this function is safe to call
1007 1030   * from inside or outside the MAC perim.
1008 1031   */
1009 1032  static int
1010 1033  dls_devnet_unset(mac_handle_t mh, datalink_id_t *id, boolean_t wait)
1011 1034  {
1012 1035          const char      *macname = mac_name(mh);
1013 1036          dls_devnet_t    *ddp;
1014 1037          int             err;
1015 1038          mod_hash_val_t  val;
1016 1039  
1017 1040          rw_enter(&i_dls_devnet_lock, RW_WRITER);
1018 1041          if ((err = mod_hash_find(i_dls_devnet_hash,
1019 1042              (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) != 0) {
1020 1043                  ASSERT(err == MH_ERR_NOTFOUND);
1021 1044                  rw_exit(&i_dls_devnet_lock);
1022 1045                  return (ENOENT);
1023 1046          }
1024 1047  
1025 1048          mutex_enter(&ddp->dd_mutex);
1026 1049  
1027 1050          /*
1028 1051           * Make sure downcalls into softmac_create or softmac_destroy from
1029 1052           * devfs don't cv_wait on any devfs related condition for fear of
1030 1053           * deadlock. Return EBUSY if the asynchronous thread started for
1031 1054           * property loading as part of the post attach hasn't yet completed.
1032 1055           */
1033 1056          VERIFY(ddp->dd_ref != 0);
1034 1057          if ((ddp->dd_ref != 1) || (!wait &&
1035 1058              (ddp->dd_tref != 0 || ddp->dd_prop_taskid != 0))) {
1036 1059                  int zstatus = 0;
1037 1060  
1038 1061                  /*
1039 1062                   * There are a couple of alternatives that might be going on
1040 1063                   * here; a) the zone is shutting down and it has a transient
  
    | 
      ↓ open down ↓ | 
    40 lines elided | 
    
      ↑ open up ↑ | 
  
1041 1064                   * link assigned, in which case we want to clean it up instead
1042 1065                   * of moving it back to the global zone, or b) its possible
1043 1066                   * that we're trying to clean up an orphaned vnic that was
1044 1067                   * delegated to a zone and which wasn't cleaned up properly
1045 1068                   * when the zone went away.  Check for either of these cases
1046 1069                   * before we simply return EBUSY.
1047 1070                   *
1048 1071                   * zstatus indicates which situation we are dealing with:
1049 1072                   *       0 - means return EBUSY
1050 1073                   *       1 - means case (a), cleanup transient link
1051      -                 *      -1 - means case (b), orphained VNIC
     1074 +                 *      -1 - means case (b), orphaned VNIC
1052 1075                   */
1053 1076                  if (ddp->dd_ref > 1 && ddp->dd_zid != GLOBAL_ZONEID) {
1054 1077                          zone_t  *zp;
1055 1078  
1056 1079                          if ((zp = zone_find_by_id(ddp->dd_zid)) == NULL) {
1057 1080                                  zstatus = -1;
1058 1081                          } else {
1059 1082                                  if (ddp->dd_transient) {
1060 1083                                          zone_status_t s = zone_status_get(zp);
1061 1084  
1062 1085                                          if (s >= ZONE_IS_SHUTTING_DOWN)
1063 1086                                                  zstatus = 1;
1064 1087                                  }
1065 1088                                  zone_rele(zp);
1066 1089                          }
1067 1090                  }
  
    | 
      ↓ open down ↓ | 
    6 lines elided | 
    
      ↑ open up ↑ | 
  
1068 1091  
1069 1092                  if (zstatus == 0) {
1070 1093                          mutex_exit(&ddp->dd_mutex);
1071 1094                          rw_exit(&i_dls_devnet_lock);
1072 1095                          return (EBUSY);
1073 1096                  }
1074 1097  
1075 1098                  /*
1076 1099                   * We want to delete the link, reset ref to 1;
1077 1100                   */
1078      -                if (zstatus == -1)
     1101 +                if (zstatus == -1) {
1079 1102                          /* Log a warning, but continue in this case */
1080 1103                          cmn_err(CE_WARN, "clear orphaned datalink: %s\n",
1081 1104                              ddp->dd_linkname);
     1105 +                }
1082 1106                  ddp->dd_ref = 1;
1083 1107          }
1084 1108  
1085 1109          ddp->dd_flags |= DD_CONDEMNED;
1086 1110          ddp->dd_ref--;
1087 1111          *id = ddp->dd_linkid;
1088 1112  
1089 1113          /*
1090 1114           * Remove this dls_devnet_t from the hash table.
1091 1115           */
1092 1116          VERIFY(mod_hash_remove(i_dls_devnet_hash,
1093 1117              (mod_hash_key_t)ddp->dd_mac, &val) == 0);
1094 1118  
1095 1119          if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
1096 1120                  VERIFY(mod_hash_remove(i_dls_devnet_id_hash,
1097 1121                      (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, &val) == 0);
1098 1122  
1099 1123                  devnet_need_rebuild = B_TRUE;
1100 1124          }
1101 1125          rw_exit(&i_dls_devnet_lock);
1102 1126  
1103 1127          /*
  
    | 
      ↓ open down ↓ | 
    12 lines elided | 
    
      ↑ open up ↑ | 
  
1104 1128           * It is important to call i_dls_devnet_setzid() WITHOUT the
1105 1129           * i_dls_devnet_lock held. The setzid call grabs the MAC
1106 1130           * perim; thus causing DLS -> MAC lock ordering if performed
1107 1131           * with the i_dls_devnet_lock held. This forces consumers to
1108 1132           * grab the MAC perim before calling dls_devnet_unset() (the
1109 1133           * locking rules state MAC -> DLS order). By performing the
1110 1134           * setzid outside of the i_dls_devnet_lock consumers can
1111 1135           * safely call dls_devnet_unset() outside the MAC perim.
1112 1136           */
1113 1137          if (ddp->dd_zid != GLOBAL_ZONEID) {
     1138 +                /*
     1139 +                 * We need to release the dd_mutex before we try and destroy the
     1140 +                 * stat. When we destroy it, we'll need to grab the lock for the
     1141 +                 * kstat but if there's a concurrent reader of the kstat, we'll
     1142 +                 * be blocked on it. This will lead to deadlock because these
     1143 +                 * kstats employ a ks_update function (dls_devnet_stat_update)
     1144 +                 * which needs the dd_mutex that we currently hold.
     1145 +                 *
     1146 +                 * Because we've already flagged the dls_devnet_t as
     1147 +                 * DD_CONDEMNED and we still have a write lock on
     1148 +                 * i_dls_devnet_lock, we should be able to release the dd_mutex.
     1149 +                 */
     1150 +                mutex_exit(&ddp->dd_mutex);
1114 1151                  dls_devnet_stat_destroy(ddp, ddp->dd_zid);
     1152 +                mutex_enter(&ddp->dd_mutex);
1115 1153                  (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE,
1116 1154                      B_FALSE);
1117 1155          }
1118 1156  
1119 1157          if (wait) {
1120 1158                  /*
1121 1159                   * Wait until all temporary references are released.
1122 1160                   * The holders of the tref need the MAC perim to
1123 1161                   * perform their work and release the tref. To avoid
1124 1162                   * deadlock, assert that the perim is never held here.
1125 1163                   */
1126 1164                  ASSERT0(MAC_PERIM_HELD(mh));
1127 1165                  while ((ddp->dd_tref != 0) || (ddp->dd_prop_taskid != 0))
1128 1166                          cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
1129 1167          } else {
1130 1168                  VERIFY(ddp->dd_tref == 0);
1131      -                VERIFY(ddp->dd_prop_taskid == (taskqid_t)NULL);
     1169 +                VERIFY(ddp->dd_prop_taskid == 0);
1132 1170          }
1133 1171  
1134 1172          if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
1135 1173                  dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
1136 1174          }
1137 1175  
1138 1176          ddp->dd_prop_loaded = B_FALSE;
1139 1177          ddp->dd_linkid = DATALINK_INVALID_LINKID;
1140 1178          ddp->dd_flags = 0;
1141 1179          mutex_exit(&ddp->dd_mutex);
1142 1180          kmem_cache_free(i_dls_devnet_cachep, ddp);
1143 1181  
1144 1182          return (0);
1145 1183  }
1146 1184  
1147 1185  /*
1148 1186   * This is a private hold routine used when we already have the dls_link_t, thus
1149 1187   * we know that it cannot go away.
1150 1188   */
1151 1189  int
1152 1190  dls_devnet_hold_tmp_by_link(dls_link_t *dlp, dls_dl_handle_t *ddhp)
1153 1191  {
1154 1192          int err;
1155 1193          dls_devnet_t *ddp = NULL;
  
    | 
      ↓ open down ↓ | 
    14 lines elided | 
    
      ↑ open up ↑ | 
  
1156 1194  
1157 1195          rw_enter(&i_dls_devnet_lock, RW_WRITER);
1158 1196          if ((err = mod_hash_find(i_dls_devnet_hash,
1159 1197              (mod_hash_key_t)dlp->dl_name, (mod_hash_val_t *)&ddp)) != 0) {
1160 1198                  ASSERT(err == MH_ERR_NOTFOUND);
1161 1199                  rw_exit(&i_dls_devnet_lock);
1162 1200                  return (ENOENT);
1163 1201          }
1164 1202  
1165 1203          mutex_enter(&ddp->dd_mutex);
1166      -        ASSERT(ddp->dd_ref > 0);
1167      -        if (ddp->dd_flags & DD_CONDEMNED) {
     1204 +        VERIFY(ddp->dd_ref > 0);
     1205 +        if (DD_NOT_VISIBLE(ddp->dd_flags)) {
1168 1206                  mutex_exit(&ddp->dd_mutex);
1169 1207                  rw_exit(&i_dls_devnet_lock);
1170 1208                  return (ENOENT);
1171 1209          }
1172 1210          ddp->dd_tref++;
1173 1211          mutex_exit(&ddp->dd_mutex);
1174 1212          rw_exit(&i_dls_devnet_lock);
1175 1213  
1176 1214          *ddhp = ddp;
1177 1215          return (0);
1178 1216  }
1179 1217  
1180 1218  static int
1181 1219  dls_devnet_hold_common(datalink_id_t linkid, dls_devnet_t **ddpp,
1182 1220      boolean_t tmp_hold)
1183 1221  {
1184 1222          dls_devnet_t            *ddp;
1185 1223          int                     err;
1186 1224  
1187 1225          rw_enter(&i_dls_devnet_lock, RW_READER);
1188 1226          if ((err = mod_hash_find(i_dls_devnet_id_hash,
1189 1227              (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&ddp)) != 0) {
1190 1228                  ASSERT(err == MH_ERR_NOTFOUND);
1191 1229                  rw_exit(&i_dls_devnet_lock);
1192 1230                  return (ENOENT);
1193 1231          }
1194 1232  
1195 1233          mutex_enter(&ddp->dd_mutex);
1196 1234          VERIFY(ddp->dd_ref > 0);
1197 1235          if (DD_NOT_VISIBLE(ddp->dd_flags)) {
1198 1236                  mutex_exit(&ddp->dd_mutex);
1199 1237                  rw_exit(&i_dls_devnet_lock);
1200 1238                  return (ENOENT);
1201 1239          }
1202 1240          if (tmp_hold)
1203 1241                  ddp->dd_tref++;
1204 1242          else
1205 1243                  ddp->dd_ref++;
1206 1244          mutex_exit(&ddp->dd_mutex);
1207 1245          rw_exit(&i_dls_devnet_lock);
1208 1246  
1209 1247          *ddpp = ddp;
1210 1248          return (0);
1211 1249  }
1212 1250  
1213 1251  int
1214 1252  dls_devnet_hold(datalink_id_t linkid, dls_devnet_t **ddpp)
1215 1253  {
1216 1254          return (dls_devnet_hold_common(linkid, ddpp, B_FALSE));
1217 1255  }
1218 1256  
1219 1257  /*
1220 1258   * Hold the vanity naming structure (dls_devnet_t) temporarily.  The request to
1221 1259   * delete the dls_devnet_t will wait until the temporary reference is released.
1222 1260   */
1223 1261  int
1224 1262  dls_devnet_hold_tmp(datalink_id_t linkid, dls_devnet_t **ddpp)
1225 1263  {
1226 1264          return (dls_devnet_hold_common(linkid, ddpp, B_TRUE));
1227 1265  }
1228 1266  
1229 1267  /*
1230 1268   * This funtion is called when a DLS client tries to open a device node.
1231 1269   * This dev_t could be a result of a /dev/net node access (returned by
1232 1270   * devnet_create_rvp->dls_devnet_open()) or a direct /dev node access.
1233 1271   * In both cases, this function bumps up the reference count of the
1234 1272   * dls_devnet_t structure. The reference is held as long as the device node
1235 1273   * is open. In the case of /dev/net while it is true that the initial reference
1236 1274   * is held when the devnet_create_rvp->dls_devnet_open call happens, this
1237 1275   * initial reference is released immediately in devnet_inactive_callback ->
1238 1276   * dls_devnet_close(). (Note that devnet_inactive_callback() is called right
1239 1277   * after dld_open completes, not when the /dev/net node is being closed).
1240 1278   * To undo this function, call dls_devnet_rele()
1241 1279   */
1242 1280  int
1243 1281  dls_devnet_hold_by_dev(dev_t dev, dls_dl_handle_t *ddhp)
1244 1282  {
1245 1283          char                    name[MAXNAMELEN];
1246 1284          char                    *drv;
1247 1285          dls_devnet_t            *ddp;
1248 1286          int                     err;
1249 1287  
1250 1288          if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1251 1289                  return (EINVAL);
1252 1290  
1253 1291          (void) snprintf(name, sizeof (name), "%s%d", drv,
1254 1292              DLS_MINOR2INST(getminor(dev)));
1255 1293  
1256 1294          rw_enter(&i_dls_devnet_lock, RW_READER);
1257 1295          if ((err = mod_hash_find(i_dls_devnet_hash,
1258 1296              (mod_hash_key_t)name, (mod_hash_val_t *)&ddp)) != 0) {
1259 1297                  ASSERT(err == MH_ERR_NOTFOUND);
1260 1298                  rw_exit(&i_dls_devnet_lock);
1261 1299                  return (ENOENT);
1262 1300          }
1263 1301          mutex_enter(&ddp->dd_mutex);
1264 1302          VERIFY(ddp->dd_ref > 0);
1265 1303          if (DD_NOT_VISIBLE(ddp->dd_flags)) {
1266 1304                  mutex_exit(&ddp->dd_mutex);
1267 1305                  rw_exit(&i_dls_devnet_lock);
1268 1306                  return (ENOENT);
1269 1307          }
1270 1308          ddp->dd_ref++;
1271 1309          mutex_exit(&ddp->dd_mutex);
1272 1310          rw_exit(&i_dls_devnet_lock);
1273 1311  
1274 1312          *ddhp = ddp;
1275 1313          return (0);
1276 1314  }
1277 1315  
1278 1316  void
1279 1317  dls_devnet_rele(dls_devnet_t *ddp)
1280 1318  {
1281 1319          mutex_enter(&ddp->dd_mutex);
1282 1320          VERIFY(ddp->dd_ref > 1);
1283 1321          ddp->dd_ref--;
1284 1322          if ((ddp->dd_flags & DD_IMPLICIT_IPTUN) && ddp->dd_ref == 1) {
1285 1323                  mutex_exit(&ddp->dd_mutex);
1286 1324                  if (i_dls_devnet_destroy_iptun(ddp->dd_linkid) != 0)
1287 1325                          ddp->dd_flags |= DD_IMPLICIT_IPTUN;
1288 1326                  return;
1289 1327          }
1290 1328          mutex_exit(&ddp->dd_mutex);
1291 1329  }
1292 1330  
1293 1331  static int
1294 1332  dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp, zoneid_t zid)
1295 1333  {
1296 1334          char                    drv[MAXLINKNAMELEN];
1297 1335          uint_t                  ppa;
1298 1336          major_t                 major;
1299 1337          dev_t                   phy_dev, tmp_dev;
1300 1338          datalink_id_t           linkid;
1301 1339          dls_dev_handle_t        ddh;
1302 1340          int                     err;
1303 1341  
1304 1342          if ((err = dls_mgmt_get_linkid_in_zone(link, &linkid, zid)) == 0)
1305 1343                  return (dls_devnet_hold(linkid, ddpp));
1306 1344  
1307 1345          /*
1308 1346           * If we failed to get the link's linkid because the dlmgmtd daemon
1309 1347           * has not been started, return ENOENT so that the application can
1310 1348           * fallback to open the /dev node.
1311 1349           */
1312 1350          if (err == EBADF)
1313 1351                  return (ENOENT);
1314 1352  
1315 1353          if (err != ENOENT)
1316 1354                  return (err);
1317 1355  
1318 1356          /*
1319 1357           * If we reach this point it means dlmgmtd is up but has no
1320 1358           * mapping for the link name.
1321 1359           */
1322 1360          if (ddi_parse(link, drv, &ppa) != DDI_SUCCESS)
1323 1361                  return (ENOENT);
1324 1362  
1325 1363          if (IS_IPTUN_LINK(drv)) {
1326 1364                  if ((err = i_dls_devnet_create_iptun(link, drv, &linkid)) != 0)
1327 1365                          return (err);
1328 1366                  /*
1329 1367                   * At this point, an IP tunnel MAC has registered, which
1330 1368                   * resulted in a link being created.
1331 1369                   */
1332 1370                  err = dls_devnet_hold(linkid, ddpp);
1333 1371                  if (err != 0) {
1334 1372                          VERIFY(i_dls_devnet_destroy_iptun(linkid) == 0);
1335 1373                          return (err);
1336 1374                  }
1337 1375                  /*
1338 1376                   * dls_devnet_rele() will know to destroy the implicit IP
1339 1377                   * tunnel on last reference release if DD_IMPLICIT_IPTUN is
1340 1378                   * set.
1341 1379                   */
1342 1380                  (*ddpp)->dd_flags |= DD_IMPLICIT_IPTUN;
1343 1381                  return (0);
1344 1382          }
1345 1383  
1346 1384          /*
1347 1385           * If this link:
1348 1386           * (a) is a physical device, (b) this is the first boot, (c) the MAC
1349 1387           * is not registered yet, and (d) we cannot find its linkid, then the
1350 1388           * linkname is the same as the devname.
1351 1389           *
1352 1390           * First filter out invalid names.
1353 1391           */
1354 1392          if ((major = ddi_name_to_major(drv)) == (major_t)-1)
1355 1393                  return (ENOENT);
1356 1394  
1357 1395          phy_dev = makedevice(major, DLS_PPA2MINOR(ppa));
1358 1396          if (softmac_hold_device(phy_dev, &ddh) != 0)
1359 1397                  return (ENOENT);
1360 1398  
1361 1399          /*
1362 1400           * At this time, the MAC should be registered, check its phy_dev using
1363 1401           * the given name.
1364 1402           */
1365 1403          if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0 ||
1366 1404              (err = dls_mgmt_get_phydev(linkid, &tmp_dev)) != 0) {
1367 1405                  softmac_rele_device(ddh);
1368 1406                  return (err);
1369 1407          }
1370 1408          if (tmp_dev != phy_dev) {
1371 1409                  softmac_rele_device(ddh);
1372 1410                  return (ENOENT);
1373 1411          }
1374 1412  
1375 1413          err = dls_devnet_hold(linkid, ddpp);
1376 1414          softmac_rele_device(ddh);
1377 1415          return (err);
1378 1416  }
1379 1417  
1380 1418  int
1381 1419  dls_devnet_macname2linkid(const char *macname, datalink_id_t *linkidp)
1382 1420  {
1383 1421          dls_devnet_t    *ddp;
1384 1422  
1385 1423          rw_enter(&i_dls_devnet_lock, RW_READER);
1386 1424          if (mod_hash_find(i_dls_devnet_hash, (mod_hash_key_t)macname,
1387 1425              (mod_hash_val_t *)&ddp) != 0) {
1388 1426                  rw_exit(&i_dls_devnet_lock);
1389 1427                  return (ENOENT);
1390 1428          }
1391 1429  
1392 1430          *linkidp = ddp->dd_linkid;
1393 1431          rw_exit(&i_dls_devnet_lock);
1394 1432          return (0);
1395 1433  }
1396 1434  
1397 1435  /*
1398 1436   * Get linkid for the given dev.
1399 1437   */
1400 1438  int
1401 1439  dls_devnet_dev2linkid(dev_t dev, datalink_id_t *linkidp)
1402 1440  {
1403 1441          char    macname[MAXNAMELEN];
1404 1442          char    *drv;
1405 1443  
1406 1444          if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1407 1445                  return (EINVAL);
1408 1446  
1409 1447          (void) snprintf(macname, sizeof (macname), "%s%d", drv,
1410 1448              DLS_MINOR2INST(getminor(dev)));
1411 1449          return (dls_devnet_macname2linkid(macname, linkidp));
1412 1450  }
1413 1451  
1414 1452  /*
1415 1453   * Get the link's physical dev_t. It this is a VLAN, get the dev_t of the
1416 1454   * link this VLAN is created on.
1417 1455   */
1418 1456  int
1419 1457  dls_devnet_phydev(datalink_id_t vlanid, dev_t *devp)
1420 1458  {
1421 1459          dls_devnet_t    *ddp;
1422 1460          int             err;
1423 1461  
1424 1462          if ((err = dls_devnet_hold_tmp(vlanid, &ddp)) != 0)
1425 1463                  return (err);
1426 1464  
1427 1465          err = dls_mgmt_get_phydev(ddp->dd_linkid, devp);
1428 1466          dls_devnet_rele_tmp(ddp);
1429 1467          return (err);
1430 1468  }
1431 1469  
1432 1470  /*
1433 1471   * Handle the renaming requests.  There are two rename cases:
1434 1472   *
1435 1473   * 1. Request to rename a valid link (id1) to an non-existent link name
1436 1474   *    (id2). In this case id2 is DATALINK_INVALID_LINKID.  Just check whether
1437 1475   *    id1 is held by any applications.
1438 1476   *
1439 1477   *    In this case, the link's kstats need to be updated using the given name.
1440 1478   *
1441 1479   * 2. Request to rename a valid link (id1) to the name of a REMOVED
1442 1480   *    physical link (id2). In this case, check that id1 and its associated
1443 1481   *    mac is not held by any application, and update the link's linkid to id2.
1444 1482   *
1445 1483   *    This case does not change the <link name, linkid> mapping, so the link's
1446 1484   *    kstats need to be updated with using name associated the given id2.
1447 1485   *
1448 1486   * The zoneinit parameter is used to allow us to create a VNIC in the global
1449 1487   * zone which is assigned to a non-global zone.  Since there is a race condition
1450 1488   * in the create process if two VNICs have the same name, we need to rename it
1451 1489   * after it has been assigned to the zone.
1452 1490   */
1453 1491  int
1454 1492  dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link,
1455 1493      boolean_t zoneinit)
1456 1494  {
1457 1495          dls_dev_handle_t        ddh = NULL;
1458 1496          int                     err = 0;
1459 1497          dev_t                   phydev = 0;
1460 1498          dls_devnet_t            *ddp;
1461 1499          mac_perim_handle_t      mph = NULL;
1462 1500          mac_handle_t            mh;
1463 1501          mod_hash_val_t          val;
1464 1502  
1465 1503          /*
1466 1504           * In the second case, id2 must be a REMOVED physical link.
1467 1505           */
1468 1506          if ((id2 != DATALINK_INVALID_LINKID) &&
1469 1507              (dls_mgmt_get_phydev(id2, &phydev) == 0) &&
1470 1508              softmac_hold_device(phydev, &ddh) == 0) {
1471 1509                  softmac_rele_device(ddh);
1472 1510                  return (EEXIST);
1473 1511          }
1474 1512  
1475 1513          /*
1476 1514           * Hold id1 to prevent it from being detached (if a physical link).
1477 1515           */
1478 1516          if (dls_mgmt_get_phydev(id1, &phydev) == 0)
1479 1517                  (void) softmac_hold_device(phydev, &ddh);
1480 1518  
1481 1519          /*
1482 1520           * The framework does not hold hold locks across calls to the
1483 1521           * mac perimeter, hence enter the perimeter first. This also waits
1484 1522           * for the property loading to finish.
1485 1523           */
1486 1524          if ((err = mac_perim_enter_by_linkid(id1, &mph)) != 0) {
1487 1525                  softmac_rele_device(ddh);
1488 1526                  return (err);
1489 1527          }
1490 1528  
1491 1529          rw_enter(&i_dls_devnet_lock, RW_WRITER);
1492 1530          if ((err = mod_hash_find(i_dls_devnet_id_hash,
1493 1531              (mod_hash_key_t)(uintptr_t)id1, (mod_hash_val_t *)&ddp)) != 0) {
1494 1532                  ASSERT(err == MH_ERR_NOTFOUND);
1495 1533                  err = ENOENT;
1496 1534                  goto done;
1497 1535          }
1498 1536  
1499 1537          mutex_enter(&ddp->dd_mutex);
1500 1538          if (!zoneinit) {
1501 1539                  if (ddp->dd_ref > 1) {
1502 1540                          mutex_exit(&ddp->dd_mutex);
1503 1541                          err = EBUSY;
1504 1542                          goto done;
1505 1543                  }
1506 1544          }
1507 1545          mutex_exit(&ddp->dd_mutex);
1508 1546  
1509 1547          if (id2 == DATALINK_INVALID_LINKID) {
1510 1548                  (void) strlcpy(ddp->dd_linkname, link,
1511 1549                      sizeof (ddp->dd_linkname));
1512 1550  
1513 1551                  /* rename mac client name and its flow if exists */
1514 1552                  if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1515 1553                          goto done;
1516 1554                  if (zoneinit) {
1517 1555                          char tname[MAXLINKNAMELEN];
1518 1556  
1519 1557                          (void) snprintf(tname, sizeof (tname), "z%d_%s",
1520 1558                              ddp->dd_zid, link);
1521 1559                          (void) mac_rename_primary(mh, tname);
1522 1560                  } else {
1523 1561                          (void) mac_rename_primary(mh, link);
1524 1562                  }
1525 1563                  mac_close(mh);
1526 1564                  goto done;
1527 1565          }
1528 1566  
1529 1567          /*
1530 1568           * The second case, check whether the MAC is used by any MAC
1531 1569           * user.  This must be a physical link so ddh must not be NULL.
1532 1570           */
1533 1571          if (ddh == NULL) {
1534 1572                  err = EINVAL;
1535 1573                  goto done;
1536 1574          }
1537 1575  
1538 1576          if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1539 1577                  goto done;
1540 1578  
1541 1579          /*
1542 1580           * We release the reference of the MAC which mac_open() is
1543 1581           * holding. Note that this mac will not be unregistered
1544 1582           * because the physical device is held.
1545 1583           */
1546 1584          mac_close(mh);
1547 1585  
1548 1586          /*
1549 1587           * Check if there is any other MAC clients, if not, hold this mac
1550 1588           * exclusively until we are done.
1551 1589           */
1552 1590          if ((err = mac_mark_exclusive(mh)) != 0)
1553 1591                  goto done;
1554 1592  
1555 1593          /*
1556 1594           * Update the link's linkid.
1557 1595           */
1558 1596          if ((err = mod_hash_find(i_dls_devnet_id_hash,
1559 1597              (mod_hash_key_t)(uintptr_t)id2, &val)) != MH_ERR_NOTFOUND) {
1560 1598                  mac_unmark_exclusive(mh);
1561 1599                  err = EEXIST;
1562 1600                  goto done;
1563 1601          }
1564 1602  
1565 1603          err = dls_mgmt_get_linkinfo(id2, ddp->dd_linkname, NULL, NULL, NULL);
1566 1604          if (err != 0) {
1567 1605                  mac_unmark_exclusive(mh);
1568 1606                  goto done;
1569 1607          }
1570 1608  
1571 1609          (void) mod_hash_remove(i_dls_devnet_id_hash,
1572 1610              (mod_hash_key_t)(uintptr_t)id1, &val);
1573 1611  
1574 1612          ddp->dd_linkid = id2;
1575 1613          (void) mod_hash_insert(i_dls_devnet_id_hash,
1576 1614              (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, (mod_hash_val_t)ddp);
1577 1615  
1578 1616          mac_unmark_exclusive(mh);
1579 1617  
1580 1618          /* load properties for new id */
1581 1619          mutex_enter(&ddp->dd_mutex);
1582 1620          ddp->dd_prop_loaded = B_FALSE;
1583 1621          ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
1584 1622              dls_devnet_prop_task, ddp, TQ_SLEEP);
1585 1623          mutex_exit(&ddp->dd_mutex);
1586 1624  
1587 1625  done:
1588 1626          rw_exit(&i_dls_devnet_lock);
1589 1627  
1590 1628          if (err == 0)
1591 1629                  dls_devnet_stat_rename(ddp, zoneinit);
1592 1630  
1593 1631          if (mph != NULL)
1594 1632                  mac_perim_exit(mph);
1595 1633          softmac_rele_device(ddh);
1596 1634          return (err);
1597 1635  }
1598 1636  
1599 1637  static int
1600 1638  i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop,
1601 1639      boolean_t transient)
1602 1640  {
1603 1641          int                     err;
1604 1642          mac_perim_handle_t      mph;
1605 1643          boolean_t               upcall_done = B_FALSE;
1606 1644          datalink_id_t           linkid = ddp->dd_linkid;
1607 1645          zoneid_t                old_zoneid = ddp->dd_zid;
1608 1646          dlmgmt_door_setzoneid_t setzid;
1609 1647          dlmgmt_setzoneid_retval_t retval;
1610 1648  
1611 1649          if (old_zoneid == new_zoneid)
1612 1650                  return (0);
1613 1651  
1614 1652          if ((err = mac_perim_enter_by_macname(ddp->dd_mac, &mph)) != 0)
1615 1653                  return (err);
1616 1654  
1617 1655          /*
1618 1656           * When changing the zoneid of an existing link, we need to tell
1619 1657           * dlmgmtd about it.  dlmgmtd already knows the zoneid associated with
1620 1658           * newly created links.
1621 1659           */
1622 1660          if (setprop) {
1623 1661                  setzid.ld_cmd = DLMGMT_CMD_SETZONEID;
1624 1662                  setzid.ld_linkid = linkid;
1625 1663                  setzid.ld_zoneid = new_zoneid;
1626 1664                  err = i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1627 1665                      sizeof (retval));
1628 1666                  if (err != 0)
1629 1667                          goto done;
1630 1668  
1631 1669                  /*
1632 1670                   * We set upcall_done only if the upcall is
1633 1671                   * successful. This way, if dls_link_setzid() fails,
1634 1672                   * we know another upcall must be done to reset the
1635 1673                   * dlmgmtd state.
1636 1674                   */
1637 1675                  upcall_done = B_TRUE;
1638 1676          }
1639 1677          if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
1640 1678                  ddp->dd_zid = new_zoneid;
1641 1679                  ddp->dd_transient = transient;
1642 1680                  devnet_need_rebuild = B_TRUE;
1643 1681          }
1644 1682  
1645 1683  done:
1646 1684          if (err != 0 && upcall_done) {
1647 1685                  setzid.ld_zoneid = old_zoneid;
1648 1686                  (void) i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1649 1687                      sizeof (retval));
1650 1688          }
1651 1689          mac_perim_exit(mph);
1652 1690          return (err);
1653 1691  }
1654 1692  
1655 1693  int
1656 1694  dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid, boolean_t transient)
1657 1695  {
1658 1696          dls_devnet_t    *ddp;
1659 1697          int             err;
1660 1698          zoneid_t        old_zid;
1661 1699          boolean_t       refheld = B_FALSE;
1662 1700  
1663 1701          old_zid = ddh->dd_zid;
1664 1702  
1665 1703          if (old_zid == new_zid)
1666 1704                  return (0);
1667 1705  
1668 1706          /*
1669 1707           * Acquire an additional reference to the link if it is being assigned
1670 1708           * to a non-global zone from the global zone.
1671 1709           */
1672 1710          if (old_zid == GLOBAL_ZONEID && new_zid != GLOBAL_ZONEID) {
1673 1711                  if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
1674 1712                          return (err);
1675 1713                  refheld = B_TRUE;
1676 1714          }
1677 1715  
1678 1716          if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE, transient)) != 0) {
1679 1717                  if (refheld)
1680 1718                          dls_devnet_rele(ddp);
1681 1719                  return (err);
1682 1720          }
1683 1721  
1684 1722          /*
1685 1723           * Release the additional reference if the link is returning to the
1686 1724           * global zone from a non-global zone.
1687 1725           */
1688 1726          if (old_zid != GLOBAL_ZONEID && new_zid == GLOBAL_ZONEID)
1689 1727                  dls_devnet_rele(ddh);
1690 1728  
1691 1729          /* Re-create kstats in the appropriate zones. */
1692 1730          if (old_zid != GLOBAL_ZONEID)
1693 1731                  dls_devnet_stat_destroy(ddh, old_zid);
1694 1732          if (new_zid != GLOBAL_ZONEID)
1695 1733                  dls_devnet_stat_create(ddh, new_zid, new_zid);
1696 1734  
1697 1735          return (0);
1698 1736  }
1699 1737  
1700 1738  zoneid_t
1701 1739  dls_devnet_getzid(dls_dl_handle_t ddh)
1702 1740  {
1703 1741          return (((dls_devnet_t *)ddh)->dd_zid);
1704 1742  }
1705 1743  
1706 1744  zoneid_t
1707 1745  dls_devnet_getownerzid(dls_dl_handle_t ddh)
1708 1746  {
1709 1747          return (((dls_devnet_t *)ddh)->dd_owner_zid);
1710 1748  }
1711 1749  
1712 1750  /*
1713 1751   * Is linkid visible from zoneid?  A link is visible if it was created in the
1714 1752   * zone, or if it is currently assigned to the zone.
1715 1753   */
1716 1754  boolean_t
1717 1755  dls_devnet_islinkvisible(datalink_id_t linkid, zoneid_t zoneid)
1718 1756  {
1719 1757          dls_devnet_t    *ddp;
1720 1758          boolean_t       result;
1721 1759  
1722 1760          if (dls_devnet_hold_tmp(linkid, &ddp) != 0)
1723 1761                  return (B_FALSE);
1724 1762          result = (ddp->dd_owner_zid == zoneid || ddp->dd_zid == zoneid);
1725 1763          dls_devnet_rele_tmp(ddp);
1726 1764          return (result);
1727 1765  }
1728 1766  
1729 1767  /*
1730 1768   * Access a vanity naming node.
1731 1769   */
1732 1770  int
1733 1771  dls_devnet_open_in_zone(const char *link, dls_dl_handle_t *dhp, dev_t *devp,
1734 1772      zoneid_t zid)
1735 1773  {
1736 1774          dls_devnet_t    *ddp;
1737 1775          dls_link_t      *dlp;
1738 1776          zoneid_t        czid = getzoneid();
1739 1777          int             err;
1740 1778          mac_perim_handle_t      mph;
1741 1779  
1742 1780          if (czid != GLOBAL_ZONEID && czid != zid)
1743 1781                  return (ENOENT);
1744 1782  
1745 1783          if ((err = dls_devnet_hold_by_name(link, &ddp, zid)) != 0)
1746 1784                  return (err);
1747 1785  
1748 1786          dls_devnet_prop_task_wait(ddp);
1749 1787  
1750 1788          /*
1751 1789           * Opening a link that does not belong to the current non-global zone
1752 1790           * is not allowed.
1753 1791           */
1754 1792          if (zid != GLOBAL_ZONEID && ddp->dd_zid != zid) {
1755 1793                  dls_devnet_rele(ddp);
1756 1794                  return (ENOENT);
1757 1795          }
1758 1796  
1759 1797          err = mac_perim_enter_by_macname(ddp->dd_mac, &mph);
1760 1798          if (err != 0) {
1761 1799                  dls_devnet_rele(ddp);
1762 1800                  return (err);
1763 1801          }
1764 1802  
1765 1803          err = dls_link_hold_create(ddp->dd_mac, &dlp);
1766 1804          mac_perim_exit(mph);
1767 1805  
1768 1806          if (err != 0) {
1769 1807                  dls_devnet_rele(ddp);
1770 1808                  return (err);
1771 1809          }
1772 1810  
1773 1811          *dhp = ddp;
1774 1812          *devp = dls_link_dev(dlp);
1775 1813          return (0);
1776 1814  }
1777 1815  
1778 1816  int
1779 1817  dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
1780 1818  {
1781 1819          return (dls_devnet_open_in_zone(link, dhp, devp, getzoneid()));
1782 1820  }
1783 1821  
1784 1822  /*
1785 1823   * Close access to a vanity naming node.
1786 1824   */
1787 1825  void
1788 1826  dls_devnet_close(dls_dl_handle_t dlh)
1789 1827  {
1790 1828          dls_devnet_t    *ddp = dlh;
1791 1829          dls_link_t      *dlp;
1792 1830          mac_perim_handle_t      mph;
1793 1831  
1794 1832          VERIFY(mac_perim_enter_by_macname(ddp->dd_mac, &mph) == 0);
1795 1833          VERIFY(dls_link_hold(ddp->dd_mac, &dlp) == 0);
1796 1834  
1797 1835          /*
1798 1836           * One rele for the hold placed in dls_devnet_open, another for
1799 1837           * the hold done just above
1800 1838           */
1801 1839          dls_link_rele(dlp);
1802 1840          dls_link_rele(dlp);
1803 1841          mac_perim_exit(mph);
1804 1842  
1805 1843          dls_devnet_rele(ddp);
1806 1844  }
1807 1845  
1808 1846  /*
1809 1847   * This is used by /dev/net to rebuild the nodes for readdir().  It is not
1810 1848   * critical and no protection is needed.
1811 1849   */
1812 1850  boolean_t
1813 1851  dls_devnet_rebuild()
1814 1852  {
1815 1853          boolean_t updated = devnet_need_rebuild;
1816 1854  
1817 1855          devnet_need_rebuild = B_FALSE;
1818 1856          return (updated);
1819 1857  }
1820 1858  
1821 1859  int
1822 1860  dls_devnet_create(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid)
1823 1861  {
1824 1862          dls_link_t      *dlp;
1825 1863          dls_devnet_t    *ddp;
1826 1864          int             err;
1827 1865          mac_perim_handle_t mph;
1828 1866  
1829 1867          /*
1830 1868           * Holding the mac perimeter ensures that the downcall from the
1831 1869           * dlmgmt daemon which does the property loading does not proceed
1832 1870           * until we relinquish the perimeter.
1833 1871           */
1834 1872          mac_perim_enter_by_mh(mh, &mph);
1835 1873          /*
1836 1874           * Make this association before we call dls_link_hold_create as
1837 1875           * we need to use the linkid to get the user name for the link
1838 1876           * when we create the MAC client.
1839 1877           */
1840 1878          if ((err = dls_devnet_set(mh, linkid, zoneid, &ddp)) == 0) {
1841 1879                  if ((err = dls_link_hold_create(mac_name(mh), &dlp)) != 0) {
1842 1880                          mac_perim_exit(mph);
1843 1881                          (void) dls_devnet_unset(mh, &linkid, B_FALSE);
1844 1882                          return (err);
1845 1883                  }
1846 1884  
1847 1885                  /*
1848 1886                   * If dd_linkid is set then the link was successfully
1849 1887                   * initialized. In this case we can remove the
1850 1888                   * initializing flag and make the link visible to the
1851 1889                   * rest of the system.
1852 1890                   *
1853 1891                   * If not set then we were called by softmac and it
1854 1892                   * was unable to obtain a linkid for the physical link
1855 1893                   * because dlmgmtd is down. In that case softmac will
1856 1894                   * eventually obtain a linkid and call
1857 1895                   * dls_devnet_recreate() to complete initialization.
1858 1896                   */
1859 1897                  mutex_enter(&ddp->dd_mutex);
1860 1898                  if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
1861 1899                          ddp->dd_flags &= ~DD_INITIALIZING;
1862 1900                  mutex_exit(&ddp->dd_mutex);
1863 1901  
1864 1902          }
1865 1903  
1866 1904          mac_perim_exit(mph);
1867 1905          return (err);
1868 1906  }
1869 1907  
1870 1908  /*
1871 1909   * Set the linkid of the dls_devnet_t and add it into the i_dls_devnet_id_hash.
1872 1910   * This is called in the case that the dlmgmtd daemon is started later than
1873 1911   * the physical devices get attached, and the linkid is only known after the
1874 1912   * daemon starts.
1875 1913   */
1876 1914  int
1877 1915  dls_devnet_recreate(mac_handle_t mh, datalink_id_t linkid)
1878 1916  {
1879 1917          dls_devnet_t    *ddp;
1880 1918          int             err;
1881 1919  
1882 1920          VERIFY(linkid != DATALINK_INVALID_LINKID);
1883 1921          if ((err = dls_devnet_set(mh, linkid, GLOBAL_ZONEID, &ddp)) == 0) {
1884 1922                  mutex_enter(&ddp->dd_mutex);
1885 1923                  if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
1886 1924                          ddp->dd_flags &= ~DD_INITIALIZING;
1887 1925                  mutex_exit(&ddp->dd_mutex);
1888 1926          }
1889 1927  
1890 1928          return (err);
1891 1929  
1892 1930  }
1893 1931  
1894 1932  int
1895 1933  dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait)
1896 1934  {
1897 1935          int                     err;
1898 1936          mac_perim_handle_t      mph;
1899 1937  
1900 1938          *idp = DATALINK_INVALID_LINKID;
1901 1939          err = dls_devnet_unset(mh, idp, wait);
1902 1940  
1903 1941          /*
1904 1942           * We continue on in the face of ENOENT because the devnet
1905 1943           * unset and DLS link release are not atomic and we may have a
1906 1944           * scenario where there is no entry in i_dls_devnet_hash for
1907 1945           * the MAC name but there is an entry in i_dls_link_hash. For
1908 1946           * example, if the following occurred:
1909 1947           *
1910 1948           * 1. dls_devnet_unset() returns success, and
1911 1949           *
1912 1950           * 2. dls_link_rele_by_name() fails with ENOTEMPTY because
1913 1951           *    flows still exist, and
1914 1952           *
1915 1953           * 3. dls_devnet_set() fails to set the zone id and calls
1916 1954           *    dls_devnet_unset() -- leaving an entry in
1917 1955           *    i_dls_link_hash but no corresponding entry in
1918 1956           *    i_dls_devnet_hash.
1919 1957           *
1920 1958           * Even if #3 wasn't true the dls_devnet_set() may fail for
1921 1959           * different reasons in the future; the point is that it _can_
1922 1960           * fail as part of its contract. We can't rely on it working
1923 1961           * so we must assume that these two pieces of state (devnet
1924 1962           * and link hashes), which should always be in sync, can get
1925 1963           * out of sync and thus even if we get ENOENT from the devnet
1926 1964           * hash we should still try to delete from the link hash just
1927 1965           * in case.
  
    | 
      ↓ open down ↓ | 
    750 lines elided | 
    
      ↑ open up ↑ | 
  
1928 1966           *
1929 1967           * We could prevent the ENOTEMPTY from dls_link_rele_by_name()
1930 1968           * by calling mac_disable() before calling
1931 1969           * dls_devnet_destroy() but that's not currently possible due
1932 1970           * to a long-standing bug. OpenSolaris 6791335: The semantics
1933 1971           * of mac_disable() were modified by Crossbow such that
1934 1972           * dls_devnet_destroy() needs to be called before
1935 1973           * mac_disable() can succeed. This is because of the implicit
1936 1974           * reference that dls has on the mac_impl_t.
1937 1975           */
1938      -        if (err != 0 && err != ENOENT) {
     1976 +        if (err != 0 && err != ENOENT)
1939 1977                  return (err);
1940      -        }
1941 1978  
1942 1979          mac_perim_enter_by_mh(mh, &mph);
1943 1980          err = dls_link_rele_by_name(mac_name(mh));
1944 1981          if (err != 0) {
1945 1982                  dls_devnet_t    *ddp;
1946 1983  
1947 1984                  /*
1948 1985                   * XXX It is a general GLDv3 bug that dls_devnet_set() has to
1949 1986                   * be called to re-set the link when destroy fails.  The
1950 1987                   * zoneid below will be incorrect if this function is ever
1951 1988                   * called from kernel context or from a zone other than that
1952 1989                   * which initially created the link.
1953 1990                   */
1954 1991                  (void) dls_devnet_set(mh, *idp, crgetzoneid(CRED()), &ddp);
1955 1992  
1956 1993                  /*
1957 1994                   * You might think dd_linkid should always be set
1958 1995                   * here, but in the case where dls_devnet_unset()
1959 1996                   * returns ENOENT it will be DATALINK_INVALID_LINKID.
1960 1997                   * Stay consistent with the rest of DLS and only
1961 1998                   * remove the initializing flag if linkid is set.
1962 1999                   */
1963 2000                  mutex_enter(&ddp->dd_mutex);
1964 2001                  if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
1965 2002                          ddp->dd_flags &= ~DD_INITIALIZING;
1966 2003                  mutex_exit(&ddp->dd_mutex);
1967 2004          }
1968 2005  
1969 2006          mac_perim_exit(mph);
1970 2007          return (err);
1971 2008  }
1972 2009  
1973 2010  /*
1974 2011   * Implicitly create an IP tunnel link.
1975 2012   */
1976 2013  static int
1977 2014  i_dls_devnet_create_iptun(const char *linkname, const char *drvname,
1978 2015      datalink_id_t *linkid)
1979 2016  {
1980 2017          int             err;
1981 2018          iptun_kparams_t ik;
1982 2019          uint32_t        media;
1983 2020          netstack_t      *ns;
1984 2021          major_t         iptun_major;
1985 2022          dev_info_t      *iptun_dip;
1986 2023  
1987 2024          /* First ensure that the iptun device is attached. */
1988 2025          if ((iptun_major = ddi_name_to_major(IPTUN_DRIVER_NAME)) == (major_t)-1)
1989 2026                  return (EINVAL);
1990 2027          if ((iptun_dip = ddi_hold_devi_by_instance(iptun_major, 0, 0)) == NULL)
1991 2028                  return (EINVAL);
1992 2029  
1993 2030          if (IS_IPV4_TUN(drvname)) {
1994 2031                  ik.iptun_kparam_type = IPTUN_TYPE_IPV4;
1995 2032                  media = DL_IPV4;
1996 2033          } else if (IS_6TO4_TUN(drvname)) {
1997 2034                  ik.iptun_kparam_type = IPTUN_TYPE_6TO4;
1998 2035                  media = DL_6TO4;
1999 2036          } else if (IS_IPV6_TUN(drvname)) {
2000 2037                  ik.iptun_kparam_type = IPTUN_TYPE_IPV6;
2001 2038                  media = DL_IPV6;
2002 2039          }
2003 2040          ik.iptun_kparam_flags = (IPTUN_KPARAM_TYPE | IPTUN_KPARAM_IMPLICIT);
2004 2041  
2005 2042          /* Obtain a datalink id for this tunnel. */
2006 2043          err = dls_mgmt_create((char *)linkname, 0, DATALINK_CLASS_IPTUN, media,
2007 2044              B_FALSE, &ik.iptun_kparam_linkid);
2008 2045          if (err != 0) {
2009 2046                  ddi_release_devi(iptun_dip);
2010 2047                  return (err);
2011 2048          }
2012 2049  
2013 2050          ns = netstack_get_current();
2014 2051          err = iptun_create(&ik, CRED());
2015 2052          netstack_rele(ns);
2016 2053  
2017 2054          if (err != 0)
2018 2055                  VERIFY(dls_mgmt_destroy(ik.iptun_kparam_linkid, B_FALSE) == 0);
2019 2056          else
2020 2057                  *linkid = ik.iptun_kparam_linkid;
2021 2058  
2022 2059          ddi_release_devi(iptun_dip);
2023 2060          return (err);
2024 2061  }
2025 2062  
2026 2063  static int
2027 2064  i_dls_devnet_destroy_iptun(datalink_id_t linkid)
2028 2065  {
2029 2066          int err;
2030 2067  
2031 2068          /*
2032 2069           * Note the use of zone_kcred() here as opposed to CRED().  This is
2033 2070           * because the process that does the last close of this /dev/net node
2034 2071           * may not have necessary privileges to delete this IP tunnel, but the
2035 2072           * tunnel must always be implicitly deleted on last close.
2036 2073           */
2037 2074          if ((err = iptun_delete(linkid, zone_kcred())) == 0)
2038 2075                  (void) dls_mgmt_destroy(linkid, B_FALSE);
2039 2076          return (err);
2040 2077  }
2041 2078  
2042 2079  const char *
2043 2080  dls_devnet_link(dls_dl_handle_t ddh)
2044 2081  {
2045 2082          return (ddh->dd_linkname);
2046 2083  }
2047 2084  
2048 2085  const char *
2049 2086  dls_devnet_mac(dls_dl_handle_t ddh)
2050 2087  {
2051 2088          return (ddh->dd_mac);
2052 2089  }
2053 2090  
2054 2091  datalink_id_t
2055 2092  dls_devnet_linkid(dls_dl_handle_t ddh)
2056 2093  {
2057 2094          return (ddh->dd_linkid);
2058 2095  }
  
    | 
      ↓ open down ↓ | 
    108 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX