Print this page
usr/src/cmd/dlmgmtd/dlmgmt_door.c

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/io/dls/dls_mgmt.c
          +++ new/usr/src/uts/common/io/dls/dls_mgmt.c
↓ open down ↓ 920 lines elided ↑ open up ↑
 921  921                  }
 922  922  
 923  923                  ASSERT(ddp->dd_flags & DD_INITIALIZING);
 924  924  
 925  925          } else {
 926  926                  ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP);
 927  927                  ddp->dd_flags = DD_INITIALIZING;
 928  928                  ddp->dd_tref = 0;
 929  929                  ddp->dd_ref++;
 930  930                  ddp->dd_owner_zid = zoneid;
      931 +                /*
      932 +                 * If we are creating a new devnet which will be owned by a NGZ
      933 +                 * then mark it as transient. This link has never been in the
      934 +                 * GZ, the GZ will not have a hold on its reference, and we do
      935 +                 * not want to return it to the GZ when the zone halts.
      936 +                 */
      937 +                if (zoneid != GLOBAL_ZONEID)
      938 +                        ddp->dd_transient = B_TRUE;
 931  939                  (void) strlcpy(ddp->dd_mac, macname, sizeof (ddp->dd_mac));
 932  940                  VERIFY(mod_hash_insert(i_dls_devnet_hash,
 933  941                      (mod_hash_key_t)ddp->dd_mac, (mod_hash_val_t)ddp) == 0);
 934  942          }
 935  943  
 936  944          if (linkid != DATALINK_INVALID_LINKID) {
 937  945                  ddp->dd_linkid = linkid;
 938  946                  (void) strlcpy(ddp->dd_linkname, linkname,
 939  947                      sizeof (ddp->dd_linkname));
 940  948                  VERIFY(mod_hash_insert(i_dls_devnet_id_hash,
 941  949                      (mod_hash_key_t)(uintptr_t)linkid,
 942  950                      (mod_hash_val_t)ddp) == 0);
 943  951                  devnet_need_rebuild = B_TRUE;
 944  952                  stat_create = B_TRUE;
 945      -                mutex_enter(&ddp->dd_mutex);
 946      -                if (!ddp->dd_prop_loaded && (ddp->dd_prop_taskid == 0)) {
 947      -                        ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
 948      -                            dls_devnet_prop_task, ddp, TQ_SLEEP);
 949      -                }
 950      -                mutex_exit(&ddp->dd_mutex);
 951  953          }
 952  954          err = 0;
 953  955  done:
 954  956          /*
 955  957           * It is safe to drop the i_dls_devnet_lock at this point. In the case
 956  958           * of physical devices, the softmac framework will fail the device
 957  959           * detach based on the smac_state or smac_hold_cnt. Other cases like
 958  960           * vnic and aggr use their own scheme to serialize creates and deletes
 959  961           * and ensure that *ddp is valid.
 960  962           */
 961  963          rw_exit(&i_dls_devnet_lock);
      964 +
      965 +        if (err == 0 && zoneid != GLOBAL_ZONEID) {
      966 +                /*
      967 +                 * If this link is being created directly within a non-global
      968 +                 * zone, then flag it as transient so that it will be cleaned
      969 +                 * up when the zone is shut down.
      970 +                 */
      971 +                err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE, B_TRUE);
      972 +                if (err != 0) {
      973 +                        /*
      974 +                         * At this point the link is marked as
      975 +                         * DD_INITIALIZING -- there can be no
      976 +                         * outstanding temp refs and therefore no need
      977 +                         * to wait for them.
      978 +                         */
      979 +                        ASSERT(ddp->dd_flags & DD_INITIALIZING);
      980 +                        (void) dls_devnet_unset(mh, &linkid, B_FALSE);
      981 +                        return (err);
      982 +                }
      983 +        }
      984 +
 962  985          if (err == 0) {
 963  986                  if (zoneid != GLOBAL_ZONEID &&
 964  987                      (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE,
 965  988                      B_FALSE)) != 0) {
 966  989                          /*
 967  990                           * At this point the link is marked as
 968  991                           * DD_INITIALIZING -- there can be no
 969  992                           * outstanding temp refs and therefore no need
 970  993                           * to wait for them.
 971  994                           */
↓ open down ↓ 7 lines elided ↑ open up ↑
 979 1002                   * before calling the ks_update (dls_devnet_stat_update) entry
 980 1003                   * point which in turn grabs the i_dls_devnet_lock. So the
 981 1004                   * lock hierarchy is kstat locks -> i_dls_devnet_lock.
 982 1005                   */
 983 1006                  if (stat_create)
 984 1007                          dls_devnet_stat_create(ddp, zoneid, zoneid);
 985 1008                  if (ddpp != NULL)
 986 1009                          *ddpp = ddp;
 987 1010  
 988 1011                  mutex_enter(&ddp->dd_mutex);
 989      -                if (linkid != DATALINK_INVALID_LINKID && !ddp->dd_prop_loaded &&
 990      -                    ddp->dd_prop_taskid == TASKQID_INVALID) {
     1012 +                if (linkid != DATALINK_INVALID_LINKID &&
     1013 +                    !ddp->dd_prop_loaded && ddp->dd_prop_taskid == 0) {
 991 1014                          ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
 992 1015                              dls_devnet_prop_task, ddp, TQ_SLEEP);
 993 1016                  }
 994 1017                  mutex_exit(&ddp->dd_mutex);
 995 1018  
 996 1019          }
 997 1020          return (err);
 998 1021  }
 999 1022  
1000 1023  /*
↓ open down ↓ 40 lines elided ↑ open up ↑
1041 1064                   * link assigned, in which case we want to clean it up instead
1042 1065                   * of moving it back to the global zone, or b) its possible
1043 1066                   * that we're trying to clean up an orphaned vnic that was
1044 1067                   * delegated to a zone and which wasn't cleaned up properly
1045 1068                   * when the zone went away.  Check for either of these cases
1046 1069                   * before we simply return EBUSY.
1047 1070                   *
1048 1071                   * zstatus indicates which situation we are dealing with:
1049 1072                   *       0 - means return EBUSY
1050 1073                   *       1 - means case (a), cleanup transient link
1051      -                 *      -1 - means case (b), orphained VNIC
     1074 +                 *      -1 - means case (b), orphaned VNIC
1052 1075                   */
1053 1076                  if (ddp->dd_ref > 1 && ddp->dd_zid != GLOBAL_ZONEID) {
1054 1077                          zone_t  *zp;
1055 1078  
1056 1079                          if ((zp = zone_find_by_id(ddp->dd_zid)) == NULL) {
1057 1080                                  zstatus = -1;
1058 1081                          } else {
1059 1082                                  if (ddp->dd_transient) {
1060 1083                                          zone_status_t s = zone_status_get(zp);
1061 1084  
↓ open down ↓ 6 lines elided ↑ open up ↑
1068 1091  
1069 1092                  if (zstatus == 0) {
1070 1093                          mutex_exit(&ddp->dd_mutex);
1071 1094                          rw_exit(&i_dls_devnet_lock);
1072 1095                          return (EBUSY);
1073 1096                  }
1074 1097  
1075 1098                  /*
1076 1099                   * We want to delete the link, reset ref to 1;
1077 1100                   */
1078      -                if (zstatus == -1)
     1101 +                if (zstatus == -1) {
1079 1102                          /* Log a warning, but continue in this case */
1080 1103                          cmn_err(CE_WARN, "clear orphaned datalink: %s\n",
1081 1104                              ddp->dd_linkname);
     1105 +                }
1082 1106                  ddp->dd_ref = 1;
1083 1107          }
1084 1108  
1085 1109          ddp->dd_flags |= DD_CONDEMNED;
1086 1110          ddp->dd_ref--;
1087 1111          *id = ddp->dd_linkid;
1088 1112  
1089 1113          /*
1090 1114           * Remove this dls_devnet_t from the hash table.
1091 1115           */
↓ open down ↓ 12 lines elided ↑ open up ↑
1104 1128           * It is important to call i_dls_devnet_setzid() WITHOUT the
1105 1129           * i_dls_devnet_lock held. The setzid call grabs the MAC
1106 1130           * perim; thus causing DLS -> MAC lock ordering if performed
1107 1131           * with the i_dls_devnet_lock held. This forces consumers to
1108 1132           * grab the MAC perim before calling dls_devnet_unset() (the
1109 1133           * locking rules state MAC -> DLS order). By performing the
1110 1134           * setzid outside of the i_dls_devnet_lock consumers can
1111 1135           * safely call dls_devnet_unset() outside the MAC perim.
1112 1136           */
1113 1137          if (ddp->dd_zid != GLOBAL_ZONEID) {
     1138 +                /*
     1139 +                 * We need to release the dd_mutex before we try and destroy the
     1140 +                 * stat. When we destroy it, we'll need to grab the lock for the
     1141 +                 * kstat but if there's a concurrent reader of the kstat, we'll
     1142 +                 * be blocked on it. This will lead to deadlock because these
     1143 +                 * kstats employ a ks_update function (dls_devnet_stat_update)
     1144 +                 * which needs the dd_mutex that we currently hold.
     1145 +                 *
     1146 +                 * Because we've already flagged the dls_devnet_t as
     1147 +                 * DD_CONDEMNED and we still have a write lock on
     1148 +                 * i_dls_devnet_lock, we should be able to release the dd_mutex.
     1149 +                 */
     1150 +                mutex_exit(&ddp->dd_mutex);
1114 1151                  dls_devnet_stat_destroy(ddp, ddp->dd_zid);
     1152 +                mutex_enter(&ddp->dd_mutex);
1115 1153                  (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE,
1116 1154                      B_FALSE);
1117 1155          }
1118 1156  
1119 1157          if (wait) {
1120 1158                  /*
1121 1159                   * Wait until all temporary references are released.
1122 1160                   * The holders of the tref need the MAC perim to
1123 1161                   * perform their work and release the tref. To avoid
1124 1162                   * deadlock, assert that the perim is never held here.
1125 1163                   */
1126 1164                  ASSERT0(MAC_PERIM_HELD(mh));
1127 1165                  while ((ddp->dd_tref != 0) || (ddp->dd_prop_taskid != 0))
1128 1166                          cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
1129 1167          } else {
1130 1168                  VERIFY(ddp->dd_tref == 0);
1131      -                VERIFY(ddp->dd_prop_taskid == (taskqid_t)NULL);
     1169 +                VERIFY(ddp->dd_prop_taskid == 0);
1132 1170          }
1133 1171  
1134 1172          if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
1135 1173                  dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
1136 1174          }
1137 1175  
1138 1176          ddp->dd_prop_loaded = B_FALSE;
1139 1177          ddp->dd_linkid = DATALINK_INVALID_LINKID;
1140 1178          ddp->dd_flags = 0;
1141 1179          mutex_exit(&ddp->dd_mutex);
↓ open down ↓ 14 lines elided ↑ open up ↑
1156 1194  
1157 1195          rw_enter(&i_dls_devnet_lock, RW_WRITER);
1158 1196          if ((err = mod_hash_find(i_dls_devnet_hash,
1159 1197              (mod_hash_key_t)dlp->dl_name, (mod_hash_val_t *)&ddp)) != 0) {
1160 1198                  ASSERT(err == MH_ERR_NOTFOUND);
1161 1199                  rw_exit(&i_dls_devnet_lock);
1162 1200                  return (ENOENT);
1163 1201          }
1164 1202  
1165 1203          mutex_enter(&ddp->dd_mutex);
1166      -        ASSERT(ddp->dd_ref > 0);
1167      -        if (ddp->dd_flags & DD_CONDEMNED) {
     1204 +        VERIFY(ddp->dd_ref > 0);
     1205 +        if (DD_NOT_VISIBLE(ddp->dd_flags)) {
1168 1206                  mutex_exit(&ddp->dd_mutex);
1169 1207                  rw_exit(&i_dls_devnet_lock);
1170 1208                  return (ENOENT);
1171 1209          }
1172 1210          ddp->dd_tref++;
1173 1211          mutex_exit(&ddp->dd_mutex);
1174 1212          rw_exit(&i_dls_devnet_lock);
1175 1213  
1176 1214          *ddhp = ddp;
1177 1215          return (0);
↓ open down ↓ 750 lines elided ↑ open up ↑
1928 1966           *
1929 1967           * We could prevent the ENOTEMPTY from dls_link_rele_by_name()
1930 1968           * by calling mac_disable() before calling
1931 1969           * dls_devnet_destroy() but that's not currently possible due
1932 1970           * to a long-standing bug. OpenSolaris 6791335: The semantics
1933 1971           * of mac_disable() were modified by Crossbow such that
1934 1972           * dls_devnet_destroy() needs to be called before
1935 1973           * mac_disable() can succeed. This is because of the implicit
1936 1974           * reference that dls has on the mac_impl_t.
1937 1975           */
1938      -        if (err != 0 && err != ENOENT) {
     1976 +        if (err != 0 && err != ENOENT)
1939 1977                  return (err);
1940      -        }
1941 1978  
1942 1979          mac_perim_enter_by_mh(mh, &mph);
1943 1980          err = dls_link_rele_by_name(mac_name(mh));
1944 1981          if (err != 0) {
1945 1982                  dls_devnet_t    *ddp;
1946 1983  
1947 1984                  /*
1948 1985                   * XXX It is a general GLDv3 bug that dls_devnet_set() has to
1949 1986                   * be called to re-set the link when destroy fails.  The
1950 1987                   * zoneid below will be incorrect if this function is ever
↓ open down ↓ 108 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX