Print this page
Warning about DCE lifetimes
coyright fix
gdamore's feedback
7185 IP DCEs leak from halted non-global zones


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2012, Joyent, Inc. All rights reserved.

  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/stream.h>
  29 #include <sys/strsun.h>
  30 #include <sys/zone.h>
  31 #include <sys/ddi.h>
  32 #include <sys/disp.h>
  33 #include <sys/sunddi.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/debug.h>
  36 #include <sys/atomic.h>
  37 #include <sys/callb.h>
  38 #define _SUN_TPI_VERSION 2
  39 #include <sys/tihdr.h>
  40 
  41 #include <inet/common.h>
  42 #include <inet/mi.h>
  43 #include <inet/mib2.h>
  44 #include <inet/snmpcom.h>


  73  * as long as the entity caching the dce_t also caches the dce_generation,
  74  * and compares the cached generation to detect any changes.
  75  * Furthermore, when a DCE is deleted, if there are any outstanding references
  76  * to the DCE it will be marked as condemned. The condemned mark is
  77  * a designated generation number which is never otherwise used, hence
  78  * the single comparison with the generation number captures that as well.
  79  *
  80  * An example of code which caches is as follows:
  81  *
  82  *      if (mystruct->my_dce_generation != mystruct->my_dce->dce_generation) {
  83  *              The DCE has changed
  84  *              mystruct->my_dce = dce_lookup_pkt(mp, ixa,
  85  *                  &mystruct->my_dce_generation);
  86  *              Not needed in practice, since we have the default DCE:
  87  *              if (DCE_IS_CONDEMNED(mystruct->my_dce))
  88  *                      return failure;
  89  *      }
  90  *
  91  * Note that for IPv6 link-local addresses we record the ifindex since the
  92  * link-locals are not globally unique.




  93  */
  94 
  95 /*
  96  * Hash bucket structure for DCEs
  97  */
  98 typedef struct dcb_s {
  99         krwlock_t       dcb_lock;
 100         uint32_t        dcb_cnt;
 101         dce_t           *dcb_dce;
 102 } dcb_t;
 103 
 104 static void     dce_delete_locked(dcb_t *, dce_t *);
 105 static void     dce_make_condemned(dce_t *);
 106 
 107 static kmem_cache_t *dce_cache;
 108 static kthread_t *dce_reclaim_thread;
 109 static kmutex_t dce_reclaim_lock;
 110 static kcondvar_t dce_reclaim_cv;
 111 static int dce_reclaim_shutdown;
 112 


 309         ipst->ips_dce_default->dce_generation = DCE_GENERATION_INITIAL;
 310         ipst->ips_dce_default->dce_last_change_time =
 311             TICK_TO_SEC(ddi_get_lbolt64());
 312         ipst->ips_dce_default->dce_refcnt = 1;    /* Should never go away */
 313         ipst->ips_dce_default->dce_ipst = ipst;
 314 
 315         /* This must be a power of two since we are using IRE_ADDR_HASH macro */
 316         ipst->ips_dce_hashsize = ip_dce_hash_size;
 317         ipst->ips_dce_hash_v4 = kmem_zalloc(ipst->ips_dce_hashsize *
 318             sizeof (dcb_t), KM_SLEEP);
 319         ipst->ips_dce_hash_v6 = kmem_zalloc(ipst->ips_dce_hashsize *
 320             sizeof (dcb_t), KM_SLEEP);
 321         for (i = 0; i < ipst->ips_dce_hashsize; i++) {
 322                 rw_init(&ipst->ips_dce_hash_v4[i].dcb_lock, NULL, RW_DEFAULT,
 323                     NULL);
 324                 rw_init(&ipst->ips_dce_hash_v6[i].dcb_lock, NULL, RW_DEFAULT,
 325                     NULL);
 326         }
 327 }
 328 























 329 void
 330 dce_stack_destroy(ip_stack_t *ipst)
 331 {
 332         int i;
 333         for (i = 0; i < ipst->ips_dce_hashsize; i++) {

 334                 rw_destroy(&ipst->ips_dce_hash_v4[i].dcb_lock);

 335                 rw_destroy(&ipst->ips_dce_hash_v6[i].dcb_lock);
 336         }
 337         kmem_free(ipst->ips_dce_hash_v4,
 338             ipst->ips_dce_hashsize * sizeof (dcb_t));
 339         ipst->ips_dce_hash_v4 = NULL;
 340         kmem_free(ipst->ips_dce_hash_v6,
 341             ipst->ips_dce_hashsize * sizeof (dcb_t));
 342         ipst->ips_dce_hash_v6 = NULL;
 343         ipst->ips_dce_hashsize = 0;
 344 
 345         ASSERT(ipst->ips_dce_default->dce_refcnt == 1);
 346         kmem_cache_free(dce_cache, ipst->ips_dce_default);
 347         ipst->ips_dce_default = NULL;
 348 }
 349 
 350 /* When any DCE is good enough */
 351 dce_t *
 352 dce_get_default(ip_stack_t *ipst)
 353 {
 354         dce_t           *dce;


 938                 }
 939                 rw_exit(&dcb->dcb_lock);
 940         }
 941         optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont);
 942         ip3dbg(("ip_snmp_get: level %d, name %d, len %d\n",
 943             (int)optp->level, (int)optp->name, (int)optp->len));
 944         qreply(q, mpctl);
 945 
 946         return (mp2ctl);
 947 }
 948 
 949 /*
 950  * Remove IPv6 DCEs which refer to an ifindex that is going away.
 951  * This is not required for correctness, but it avoids netstat -d
 952  * showing stale stuff that will never be used.
 953  */
 954 void
 955 dce_cleanup(uint_t ifindex, ip_stack_t *ipst)
 956 {
 957         uint_t  i;
 958         dcb_t   *dcb;
 959         dce_t   *dce, *nextdce;
 960 
 961         for (i = 0; i < ipst->ips_dce_hashsize; i++) {
 962                 dcb = &ipst->ips_dce_hash_v6[i];
 963                 rw_enter(&dcb->dcb_lock, RW_WRITER);
 964 
 965                 for (dce = dcb->dcb_dce; dce != NULL; dce = nextdce) {
 966                         nextdce = dce->dce_next;
 967                         if (dce->dce_ifindex == ifindex) {
 968                                 dce_delete_locked(dcb, dce);
 969                                 dce_refrele(dce);
 970                         }
 971                 }
 972                 rw_exit(&dcb->dcb_lock);
 973         }
 974 }


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  25  * Copyright 2017, OmniTI Computer Consulting, Inc. All rights reserved.
  26  */
  27 
  28 #include <sys/types.h>
  29 #include <sys/stream.h>
  30 #include <sys/strsun.h>
  31 #include <sys/zone.h>
  32 #include <sys/ddi.h>
  33 #include <sys/disp.h>
  34 #include <sys/sunddi.h>
  35 #include <sys/cmn_err.h>
  36 #include <sys/debug.h>
  37 #include <sys/atomic.h>
  38 #include <sys/callb.h>
  39 #define _SUN_TPI_VERSION 2
  40 #include <sys/tihdr.h>
  41 
  42 #include <inet/common.h>
  43 #include <inet/mi.h>
  44 #include <inet/mib2.h>
  45 #include <inet/snmpcom.h>


  74  * as long as the entity caching the dce_t also caches the dce_generation,
  75  * and compares the cached generation to detect any changes.
  76  * Furthermore, when a DCE is deleted, if there are any outstanding references
  77  * to the DCE it will be marked as condemned. The condemned mark is
  78  * a designated generation number which is never otherwise used, hence
  79  * the single comparison with the generation number captures that as well.
  80  *
  81  * An example of code which caches is as follows:
  82  *
  83  *      if (mystruct->my_dce_generation != mystruct->my_dce->dce_generation) {
  84  *              The DCE has changed
  85  *              mystruct->my_dce = dce_lookup_pkt(mp, ixa,
  86  *                  &mystruct->my_dce_generation);
  87  *              Not needed in practice, since we have the default DCE:
  88  *              if (DCE_IS_CONDEMNED(mystruct->my_dce))
  89  *                      return failure;
  90  *      }
  91  *
  92  * Note that for IPv6 link-local addresses we record the ifindex since the
  93  * link-locals are not globally unique.
  94  *
  95  * DCEs can remain for an arbitrarily long time, until memory pressure or
  96  * too-deep hash buckets (see dce_lookup_and_add*()) enable the reclaim thread
  97  * to actually remove DCEs from the cache.
  98  */
  99 
 100 /*
 101  * Hash bucket structure for DCEs
 102  */
 103 typedef struct dcb_s {
 104         krwlock_t       dcb_lock;
 105         uint32_t        dcb_cnt;
 106         dce_t           *dcb_dce;
 107 } dcb_t;
 108 
 109 static void     dce_delete_locked(dcb_t *, dce_t *);
 110 static void     dce_make_condemned(dce_t *);
 111 
 112 static kmem_cache_t *dce_cache;
 113 static kthread_t *dce_reclaim_thread;
 114 static kmutex_t dce_reclaim_lock;
 115 static kcondvar_t dce_reclaim_cv;
 116 static int dce_reclaim_shutdown;
 117 


 314         ipst->ips_dce_default->dce_generation = DCE_GENERATION_INITIAL;
 315         ipst->ips_dce_default->dce_last_change_time =
 316             TICK_TO_SEC(ddi_get_lbolt64());
 317         ipst->ips_dce_default->dce_refcnt = 1;    /* Should never go away */
 318         ipst->ips_dce_default->dce_ipst = ipst;
 319 
 320         /* This must be a power of two since we are using IRE_ADDR_HASH macro */
 321         ipst->ips_dce_hashsize = ip_dce_hash_size;
 322         ipst->ips_dce_hash_v4 = kmem_zalloc(ipst->ips_dce_hashsize *
 323             sizeof (dcb_t), KM_SLEEP);
 324         ipst->ips_dce_hash_v6 = kmem_zalloc(ipst->ips_dce_hashsize *
 325             sizeof (dcb_t), KM_SLEEP);
 326         for (i = 0; i < ipst->ips_dce_hashsize; i++) {
 327                 rw_init(&ipst->ips_dce_hash_v4[i].dcb_lock, NULL, RW_DEFAULT,
 328                     NULL);
 329                 rw_init(&ipst->ips_dce_hash_v6[i].dcb_lock, NULL, RW_DEFAULT,
 330                     NULL);
 331         }
 332 }
 333 
 334 /*
 335  * Given a DCE hash bucket, unlink DCE entries from it. Some callers need
 336  * ifindex-specific matching, others don't. Don't overload ifindex to indicate
 337  * specificity, just indicate so explicitly.
 338  */
 339 static void
 340 dce_bucket_clean(dcb_t *dcb, boolean_t specific_ifindex, uint_t ifindex)
 341 {
 342         dce_t   *dce, *nextdce;
 343 
 344         rw_enter(&dcb->dcb_lock, RW_WRITER);
 345 
 346         for (dce = dcb->dcb_dce; dce != NULL; dce = nextdce) {
 347                 nextdce = dce->dce_next;
 348                 if ((!specific_ifindex) || dce->dce_ifindex == ifindex) {
 349                         dce_delete_locked(dcb, dce);
 350                         dce_refrele(dce);
 351                 }
 352         }
 353 
 354         rw_exit(&dcb->dcb_lock);
 355 }
 356 
 357 void
 358 dce_stack_destroy(ip_stack_t *ipst)
 359 {
 360         int i;
 361         for (i = 0; i < ipst->ips_dce_hashsize; i++) {
 362                 dce_bucket_clean(&ipst->ips_dce_hash_v4[i], B_FALSE, 0);
 363                 rw_destroy(&ipst->ips_dce_hash_v4[i].dcb_lock);
 364                 dce_bucket_clean(&ipst->ips_dce_hash_v6[i], B_FALSE, 0);
 365                 rw_destroy(&ipst->ips_dce_hash_v6[i].dcb_lock);
 366         }
 367         kmem_free(ipst->ips_dce_hash_v4,
 368             ipst->ips_dce_hashsize * sizeof (dcb_t));
 369         ipst->ips_dce_hash_v4 = NULL;
 370         kmem_free(ipst->ips_dce_hash_v6,
 371             ipst->ips_dce_hashsize * sizeof (dcb_t));
 372         ipst->ips_dce_hash_v6 = NULL;
 373         ipst->ips_dce_hashsize = 0;
 374 
 375         ASSERT(ipst->ips_dce_default->dce_refcnt == 1);
 376         kmem_cache_free(dce_cache, ipst->ips_dce_default);
 377         ipst->ips_dce_default = NULL;
 378 }
 379 
 380 /* When any DCE is good enough */
 381 dce_t *
 382 dce_get_default(ip_stack_t *ipst)
 383 {
 384         dce_t           *dce;


 968                 }
 969                 rw_exit(&dcb->dcb_lock);
 970         }
 971         optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont);
 972         ip3dbg(("ip_snmp_get: level %d, name %d, len %d\n",
 973             (int)optp->level, (int)optp->name, (int)optp->len));
 974         qreply(q, mpctl);
 975 
 976         return (mp2ctl);
 977 }
 978 
 979 /*
 980  * Remove IPv6 DCEs which refer to an ifindex that is going away.
 981  * This is not required for correctness, but it avoids netstat -d
 982  * showing stale stuff that will never be used.
 983  */
 984 void
 985 dce_cleanup(uint_t ifindex, ip_stack_t *ipst)
 986 {
 987         uint_t  i;


 988 
 989         for (i = 0; i < ipst->ips_dce_hashsize; i++)
 990                 dce_bucket_clean(&ipst->ips_dce_hash_v6[i], B_TRUE, ifindex);











 991 }