Print this page
Warning about DCE lifetimes
coyright fix
gdamore's feedback
7185 IP DCEs leak from halted non-global zones

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/inet/ip/ip_dce.c
          +++ new/usr/src/uts/common/inet/ip/ip_dce.c
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright (c) 2012, Joyent, Inc. All rights reserved.
       25 + * Copyright 2017, OmniTI Computer Consulting, Inc. All rights reserved.
  25   26   */
  26   27  
  27   28  #include <sys/types.h>
  28   29  #include <sys/stream.h>
  29   30  #include <sys/strsun.h>
  30   31  #include <sys/zone.h>
  31   32  #include <sys/ddi.h>
  32   33  #include <sys/disp.h>
  33   34  #include <sys/sunddi.h>
  34   35  #include <sys/cmn_err.h>
↓ open down ↓ 48 lines elided ↑ open up ↑
  83   84   *              The DCE has changed
  84   85   *              mystruct->my_dce = dce_lookup_pkt(mp, ixa,
  85   86   *                  &mystruct->my_dce_generation);
  86   87   *              Not needed in practice, since we have the default DCE:
  87   88   *              if (DCE_IS_CONDEMNED(mystruct->my_dce))
  88   89   *                      return failure;
  89   90   *      }
  90   91   *
  91   92   * Note that for IPv6 link-local addresses we record the ifindex since the
  92   93   * link-locals are not globally unique.
       94 + *
       95 + * DCEs can remain for an arbitrarily long time, until memory pressure or
       96 + * too-deep hash buckets (see dce_lookup_and_add*()) enable the reclaim thread
       97 + * to actually remove DCEs from the cache.
  93   98   */
  94   99  
  95  100  /*
  96  101   * Hash bucket structure for DCEs
  97  102   */
  98  103  typedef struct dcb_s {
  99  104          krwlock_t       dcb_lock;
 100  105          uint32_t        dcb_cnt;
 101  106          dce_t           *dcb_dce;
 102  107  } dcb_t;
↓ open down ↓ 216 lines elided ↑ open up ↑
 319  324          ipst->ips_dce_hash_v6 = kmem_zalloc(ipst->ips_dce_hashsize *
 320  325              sizeof (dcb_t), KM_SLEEP);
 321  326          for (i = 0; i < ipst->ips_dce_hashsize; i++) {
 322  327                  rw_init(&ipst->ips_dce_hash_v4[i].dcb_lock, NULL, RW_DEFAULT,
 323  328                      NULL);
 324  329                  rw_init(&ipst->ips_dce_hash_v6[i].dcb_lock, NULL, RW_DEFAULT,
 325  330                      NULL);
 326  331          }
 327  332  }
 328  333  
      334 +/*
      335 + * Given a DCE hash bucket, unlink DCE entries from it. Some callers need
      336 + * ifindex-specific matching, others don't. Don't overload ifindex to indicate
      337 + * specificity, just indicate so explicitly.
      338 + */
      339 +static void
      340 +dce_bucket_clean(dcb_t *dcb, boolean_t specific_ifindex, uint_t ifindex)
      341 +{
      342 +        dce_t   *dce, *nextdce;
      343 +
      344 +        rw_enter(&dcb->dcb_lock, RW_WRITER);
      345 +
      346 +        for (dce = dcb->dcb_dce; dce != NULL; dce = nextdce) {
      347 +                nextdce = dce->dce_next;
      348 +                if ((!specific_ifindex) || dce->dce_ifindex == ifindex) {
      349 +                        dce_delete_locked(dcb, dce);
      350 +                        dce_refrele(dce);
      351 +                }
      352 +        }
      353 +
      354 +        rw_exit(&dcb->dcb_lock);
      355 +}
      356 +
 329  357  void
 330  358  dce_stack_destroy(ip_stack_t *ipst)
 331  359  {
 332  360          int i;
 333  361          for (i = 0; i < ipst->ips_dce_hashsize; i++) {
      362 +                dce_bucket_clean(&ipst->ips_dce_hash_v4[i], B_FALSE, 0);
 334  363                  rw_destroy(&ipst->ips_dce_hash_v4[i].dcb_lock);
      364 +                dce_bucket_clean(&ipst->ips_dce_hash_v6[i], B_FALSE, 0);
 335  365                  rw_destroy(&ipst->ips_dce_hash_v6[i].dcb_lock);
 336  366          }
 337  367          kmem_free(ipst->ips_dce_hash_v4,
 338  368              ipst->ips_dce_hashsize * sizeof (dcb_t));
 339  369          ipst->ips_dce_hash_v4 = NULL;
 340  370          kmem_free(ipst->ips_dce_hash_v6,
 341  371              ipst->ips_dce_hashsize * sizeof (dcb_t));
 342  372          ipst->ips_dce_hash_v6 = NULL;
 343  373          ipst->ips_dce_hashsize = 0;
 344  374  
↓ open down ↓ 603 lines elided ↑ open up ↑
 948  978  
 949  979  /*
 950  980   * Remove IPv6 DCEs which refer to an ifindex that is going away.
 951  981   * This is not required for correctness, but it avoids netstat -d
 952  982   * showing stale stuff that will never be used.
 953  983   */
 954  984  void
 955  985  dce_cleanup(uint_t ifindex, ip_stack_t *ipst)
 956  986  {
 957  987          uint_t  i;
 958      -        dcb_t   *dcb;
 959      -        dce_t   *dce, *nextdce;
 960  988  
 961      -        for (i = 0; i < ipst->ips_dce_hashsize; i++) {
 962      -                dcb = &ipst->ips_dce_hash_v6[i];
 963      -                rw_enter(&dcb->dcb_lock, RW_WRITER);
 964      -
 965      -                for (dce = dcb->dcb_dce; dce != NULL; dce = nextdce) {
 966      -                        nextdce = dce->dce_next;
 967      -                        if (dce->dce_ifindex == ifindex) {
 968      -                                dce_delete_locked(dcb, dce);
 969      -                                dce_refrele(dce);
 970      -                        }
 971      -                }
 972      -                rw_exit(&dcb->dcb_lock);
 973      -        }
      989 +        for (i = 0; i < ipst->ips_dce_hashsize; i++)
      990 +                dce_bucket_clean(&ipst->ips_dce_hash_v6[i], B_TRUE, ifindex);
 974  991  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX