Print this page
    
Fix NFS design problems re. multiple zone keys
Make NFS server zone-specific data all have the same lifetime
Fix rfs4_clean_state_exi
Fix exi_cache_reclaim
Fix mistakes in zone keys work
More fixes re. exi_zoneid and exi_tree
(danmcd -> Keep some ASSERT()s around for readability.)
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs4_db.c
          +++ new/usr/src/uts/common/fs/nfs/nfs4_db.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27   27   * Copyright 2019 Nexenta Systems, Inc.
  28   28   */
  29   29  
  30   30  #include <sys/systm.h>
  31   31  #include <sys/cmn_err.h>
  32   32  #include <sys/kmem.h>
  33   33  #include <sys/disp.h>
  34   34  #include <sys/id_space.h>
  35   35  #include <rpc/rpc.h>
  36   36  #include <nfs/nfs4.h>
  37   37  #include <nfs/nfs4_db_impl.h>
  38   38  #include <sys/sdt.h>
  39   39  
  40   40  static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
  41   41  
  42   42  static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
  43   43  static void rfs4_dbe_destroy(rfs4_dbe_t *);
  44   44  static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
  45   45  static void rfs4_start_reaper(rfs4_table_t *);
  46   46  
  47   47  /*
  48   48   * t_lowat - integer percentage of table entries        /etc/system only
  49   49   * t_hiwat - integer percentage of table entries        /etc/system only
  50   50   * t_lreap - integer percentage of table reap time      mdb or /etc/system
  51   51   * t_hreap - integer percentage of table reap time      mdb or /etc/system
  52   52   */
  53   53  uint32_t        t_lowat = 50;   /* reap at t_lreap when id's in use hit 50% */
  54   54  uint32_t        t_hiwat = 75;   /* reap at t_hreap when id's in use hit 75% */
  55   55  time_t          t_lreap = 50;   /* default to 50% of table's reap interval */
  56   56  time_t          t_hreap = 10;   /* default to 10% of table's reap interval */
  57   57  
  58   58  id_t
  59   59  rfs4_dbe_getid(rfs4_dbe_t *entry)
  60   60  {
  61   61          return (entry->dbe_id);
  62   62  }
  63   63  
  64   64  void
  65   65  rfs4_dbe_hold(rfs4_dbe_t *entry)
  66   66  {
  67   67          if (!MUTEX_HELD(entry->dbe_lock)) {
  68   68                  mutex_enter(entry->dbe_lock);
  69   69                  entry->dbe_refcnt++;
  70   70                  mutex_exit(entry->dbe_lock);
  71   71          } else {
  72   72                  entry->dbe_refcnt++;
  73   73          }
  74   74  }
  75   75  
  76   76  /*
  77   77   * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
  78   78   */
  79   79  void
  80   80  rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
  81   81  {
  82   82          if (!MUTEX_HELD(entry->dbe_lock)) {
  83   83                  ASSERT(entry->dbe_refcnt > 0);
  84   84                  mutex_enter(entry->dbe_lock);
  85   85                  entry->dbe_refcnt--;
  86   86                  mutex_exit(entry->dbe_lock);
  87   87          } else {
  88   88                  entry->dbe_refcnt--;
  89   89          }
  90   90  }
  91   91  
  92   92  
  93   93  uint32_t
  94   94  rfs4_dbe_refcnt(rfs4_dbe_t *entry)
  95   95  {
  96   96          return (entry->dbe_refcnt);
  97   97  }
  98   98  
  99   99  /*
 100  100   * Mark an entry such that the dbsearch will skip it.
 101  101   * Caller does not want this entry to be found any longer
 102  102   */
 103  103  void
 104  104  rfs4_dbe_invalidate(rfs4_dbe_t *entry)
 105  105  {
 106  106          if (!MUTEX_HELD(entry->dbe_lock)) {
 107  107                  mutex_enter(entry->dbe_lock);
 108  108                  entry->dbe_invalid = TRUE;
 109  109                  entry->dbe_skipsearch = TRUE;
 110  110                  mutex_exit(entry->dbe_lock);
 111  111          } else {
 112  112                  entry->dbe_invalid = TRUE;
 113  113                  entry->dbe_skipsearch = TRUE;
 114  114          }
 115  115  }
 116  116  
 117  117  /*
 118  118   * Is this entry invalid?
 119  119   */
 120  120  bool_t
 121  121  rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
 122  122  {
 123  123          return (entry->dbe_invalid);
 124  124  }
 125  125  
 126  126  time_t
 127  127  rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
 128  128  {
 129  129          return (entry->dbe_time_rele);
 130  130  }
 131  131  
 132  132  /*
 133  133   * Use these to temporarily hide/unhide a db entry.
 134  134   */
 135  135  void
 136  136  rfs4_dbe_hide(rfs4_dbe_t *entry)
 137  137  {
 138  138          rfs4_dbe_lock(entry);
 139  139          entry->dbe_skipsearch = TRUE;
 140  140          rfs4_dbe_unlock(entry);
 141  141  }
 142  142  
 143  143  void
 144  144  rfs4_dbe_unhide(rfs4_dbe_t *entry)
 145  145  {
 146  146          rfs4_dbe_lock(entry);
 147  147          entry->dbe_skipsearch = FALSE;
 148  148          rfs4_dbe_unlock(entry);
 149  149  }
 150  150  
 151  151  void
 152  152  rfs4_dbe_rele(rfs4_dbe_t *entry)
 153  153  {
 154  154          mutex_enter(entry->dbe_lock);
 155  155          ASSERT(entry->dbe_refcnt > 1);
 156  156          entry->dbe_refcnt--;
 157  157          entry->dbe_time_rele = gethrestime_sec();
 158  158          mutex_exit(entry->dbe_lock);
 159  159  }
 160  160  
 161  161  void
 162  162  rfs4_dbe_lock(rfs4_dbe_t *entry)
 163  163  {
 164  164          mutex_enter(entry->dbe_lock);
 165  165  }
 166  166  
 167  167  void
 168  168  rfs4_dbe_unlock(rfs4_dbe_t *entry)
 169  169  {
 170  170          mutex_exit(entry->dbe_lock);
 171  171  }
 172  172  
 173  173  bool_t
 174  174  rfs4_dbe_islocked(rfs4_dbe_t *entry)
 175  175  {
 176  176          return (mutex_owned(entry->dbe_lock));
 177  177  }
 178  178  
 179  179  clock_t
 180  180  rfs4_dbe_twait(rfs4_dbe_t *entry, clock_t timeout)
 181  181  {
 182  182          return (cv_timedwait(entry->dbe_cv, entry->dbe_lock, timeout));
 183  183  }
 184  184  
 185  185  void
 186  186  rfs4_dbe_cv_broadcast(rfs4_dbe_t *entry)
 187  187  {
 188  188          cv_broadcast(entry->dbe_cv);
 189  189  }
 190  190  
 191  191  /* ARGSUSED */
 192  192  static int
 193  193  rfs4_dbe_kmem_constructor(void *obj, void *private, int kmflag)
 194  194  {
 195  195          rfs4_dbe_t *entry = obj;
 196  196  
 197  197          mutex_init(entry->dbe_lock, NULL, MUTEX_DEFAULT, NULL);
 198  198          cv_init(entry->dbe_cv, NULL, CV_DEFAULT, NULL);
 199  199  
 200  200          return (0);
 201  201  }
 202  202  
 203  203  static void
 204  204  rfs4_dbe_kmem_destructor(void *obj, void *private)
 205  205  {
 206  206          rfs4_dbe_t *entry = obj;
 207  207          /*LINTED*/
 208  208          rfs4_table_t *table = private;
 209  209  
 210  210          mutex_destroy(entry->dbe_lock);
 211  211          cv_destroy(entry->dbe_cv);
 212  212  }
 213  213  
 214  214  rfs4_database_t *
 215  215  rfs4_database_create(uint32_t flags)
 216  216  {
 217  217          rfs4_database_t *db;
 218  218  
 219  219          db = kmem_alloc(sizeof (rfs4_database_t), KM_SLEEP);
 220  220          mutex_init(db->db_lock, NULL, MUTEX_DEFAULT, NULL);
 221  221          db->db_tables = NULL;
 222  222          db->db_debug_flags = flags;
 223  223          db->db_shutdown_count = 0;
 224  224          cv_init(&db->db_shutdown_wait, NULL, CV_DEFAULT, NULL);
 225  225          return (db);
 226  226  }
 227  227  
 228  228  
 229  229  /*
 230  230   * The reaper threads that have been created for the tables in this
 231  231   * database must be stopped and the entries in the tables released.
 232  232   * Each table will be marked as "shutdown" and the reaper threads
 233  233   * poked and they will see that a shutdown is in progress and cleanup
 234  234   * and exit.  This function waits for all reaper threads to stop
 235  235   * before returning to the caller.
 236  236   */
 237  237  void
 238  238  rfs4_database_shutdown(rfs4_database_t *db)
 239  239  {
 240  240          rfs4_table_t *table;
 241  241  
 242  242          mutex_enter(db->db_lock);
 243  243          for (table = db->db_tables; table; table = table->dbt_tnext) {
 244  244                  mutex_enter(&table->dbt_reaper_cv_lock);
 245  245                  table->dbt_reaper_shutdown = TRUE;
 246  246                  cv_broadcast(&table->dbt_reaper_wait);
 247  247                  db->db_shutdown_count++;
 248  248                  mutex_exit(&table->dbt_reaper_cv_lock);
 249  249          }
 250  250          while (db->db_shutdown_count > 0) {
 251  251                  cv_wait(&db->db_shutdown_wait, db->db_lock);
 252  252          }
 253  253          mutex_exit(db->db_lock);
 254  254  }
 255  255  
 256  256  /*
 257  257   * Given a database that has been "shutdown" by the function above all
 258  258   * of the table tables are destroyed and then the database itself
 259  259   * freed.
 260  260   */
 261  261  void
 262  262  rfs4_database_destroy(rfs4_database_t *db)
 263  263  {
 264  264          rfs4_table_t *next, *tmp;
 265  265  
 266  266          for (next = db->db_tables; next; ) {
 267  267                  tmp = next;
 268  268                  next = tmp->dbt_tnext;
 269  269                  rfs4_table_destroy(db, tmp);
 270  270          }
 271  271  
 272  272          mutex_destroy(db->db_lock);
 273  273          kmem_free(db, sizeof (rfs4_database_t));
 274  274  }
 275  275  
 276  276  /*
 277  277   * Used to get the correct kmem_cache database for the state table being
 278  278   * created.
 279  279   * Helper function for rfs4_table_create
 280  280   */
 281  281  static kmem_cache_t *
 282  282  get_db_mem_cache(char *name)
 283  283  {
 284  284          int i;
 285  285  
 286  286          for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
 287  287                  if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0)
 288  288                          return (rfs4_db_mem_cache_table[i].r_db_mem_cache);
 289  289          }
 290  290          /*
 291  291           * There is no associated kmem cache for this NFS4 server state
 292  292           * table name
 293  293           */
 294  294          return (NULL);
 295  295  }
 296  296  
 297  297  /*
 298  298   * Used to initialize the global NFSv4 server state database.
 299  299   * Helper funtion for rfs4_state_g_init and called when module is loaded.
 300  300   */
 301  301  kmem_cache_t *
 302  302  /* CSTYLED */
 303  303  nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx)
 304  304  {
 305  305          kmem_cache_t *mem_cache = kmem_cache_create(cache_name,
 306  306              sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
 307  307              0,
 308  308              rfs4_dbe_kmem_constructor,
 309  309              rfs4_dbe_kmem_destructor,
 310  310              NULL,
 311  311              NULL,
 312  312              NULL,
 313  313              0);
 314  314          (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name,
 315  315              strlen(cache_name) + 1);
 316  316          rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache;
 317  317          return (mem_cache);
 318  318  }
 319  319  
 320  320  rfs4_table_t *
 321  321  rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
 322  322      uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
 323  323      void (*destroy)(rfs4_entry_t),
 324  324      bool_t (*expiry)(rfs4_entry_t),
 325  325      uint32_t size, uint32_t hashsize,
 326  326      uint32_t maxentries, id_t start)
 327  327  {
 328  328          rfs4_table_t    *table;
 329  329          int              len;
 330  330          char            *cache_name;
 331  331          char            *id_name;
 332  332  
 333  333          table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP);
 334  334          table->dbt_db = db;
 335  335          rw_init(table->dbt_t_lock, NULL, RW_DEFAULT, NULL);
 336  336          mutex_init(table->dbt_lock, NULL, MUTEX_DEFAULT, NULL);
 337  337          mutex_init(&table->dbt_reaper_cv_lock, NULL, MUTEX_DEFAULT, NULL);
 338  338          cv_init(&table->dbt_reaper_wait, NULL, CV_DEFAULT, NULL);
 339  339  
 340  340          len = strlen(tabname);
 341  341          table->dbt_name = kmem_alloc(len+1, KM_SLEEP);
 342  342          cache_name = kmem_alloc(len + 12 /* "_entry_cache" */ + 1, KM_SLEEP);
 343  343          (void) strcpy(table->dbt_name, tabname);
 344  344          (void) sprintf(cache_name, "%s_entry_cache", table->dbt_name);
 345  345          table->dbt_max_cache_time = max_cache_time;
 346  346          table->dbt_usize = size;
 347  347          table->dbt_len = hashsize;
 348  348          table->dbt_count = 0;
 349  349          table->dbt_idxcnt = 0;
 350  350          table->dbt_ccnt = 0;
 351  351          table->dbt_maxcnt = idxcnt;
 352  352          table->dbt_indices = NULL;
 353  353          table->dbt_id_space = NULL;
 354  354          table->dbt_reaper_shutdown = FALSE;
 355  355  
 356  356          if (start >= 0) {
 357  357                  if (maxentries + (uint32_t)start > (uint32_t)INT32_MAX)
 358  358                          maxentries = INT32_MAX - start;
 359  359                  id_name = kmem_alloc(len + 9 /* "_id_space" */ + 1, KM_SLEEP);
 360  360                  (void) sprintf(id_name, "%s_id_space", table->dbt_name);
 361  361                  table->dbt_id_space = id_space_create(id_name, start,
 362  362                      maxentries + start);
 363  363                  kmem_free(id_name, len + 10);
 364  364          }
 365  365          ASSERT(t_lowat != 0);
 366  366          table->dbt_id_lwat = (maxentries * t_lowat) / 100;
 367  367          ASSERT(t_hiwat != 0);
 368  368          table->dbt_id_hwat = (maxentries * t_hiwat) / 100;
 369  369          table->dbt_id_reap = MIN(rfs4_reap_interval, max_cache_time);
 370  370          table->dbt_maxentries = maxentries;
 371  371          table->dbt_create = create;
 372  372          table->dbt_destroy = destroy;
 373  373          table->dbt_expiry = expiry;
 374  374  
 375  375          /*
 376  376           * get the correct kmem_cache for this table type based on the name.
 377  377           */
 378  378          table->dbt_mem_cache = get_db_mem_cache(cache_name);
 379  379  
 380  380          kmem_free(cache_name, len+13);
 381  381  
 382  382          table->dbt_debug = db->db_debug_flags;
 383  383  
 384  384          mutex_enter(db->db_lock);
 385  385          table->dbt_tnext = db->db_tables;
 386  386          db->db_tables = table;
 387  387          mutex_exit(db->db_lock);
 388  388  
 389  389          rfs4_start_reaper(table);
 390  390  
 391  391          return (table);
 392  392  }
 393  393  
 394  394  void
 395  395  rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table)
 396  396  {
 397  397          rfs4_table_t *p;
 398  398          rfs4_index_t *idx;
 399  399  
 400  400          ASSERT(table->dbt_count == 0);
 401  401  
 402  402          mutex_enter(db->db_lock);
 403  403          if (table == db->db_tables)
 404  404                  db->db_tables = table->dbt_tnext;
 405  405          else {
 406  406                  for (p = db->db_tables; p; p = p->dbt_tnext)
 407  407                          if (p->dbt_tnext == table) {
 408  408                                  p->dbt_tnext = table->dbt_tnext;
 409  409                                  table->dbt_tnext = NULL;
 410  410                                  break;
 411  411                          }
 412  412                  ASSERT(p != NULL);
 413  413          }
 414  414          mutex_exit(db->db_lock);
 415  415  
 416  416          /* Destroy indices */
 417  417          while (table->dbt_indices) {
 418  418                  idx = table->dbt_indices;
 419  419                  table->dbt_indices = idx->dbi_inext;
 420  420                  rfs4_index_destroy(idx);
 421  421          }
 422  422  
 423  423          rw_destroy(table->dbt_t_lock);
 424  424          mutex_destroy(table->dbt_lock);
 425  425          mutex_destroy(&table->dbt_reaper_cv_lock);
 426  426          cv_destroy(&table->dbt_reaper_wait);
 427  427  
 428  428          kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
 429  429          if (table->dbt_id_space)
 430  430                  id_space_destroy(table->dbt_id_space);
 431  431          table->dbt_mem_cache = NULL;
 432  432          kmem_free(table, sizeof (rfs4_table_t));
 433  433  }
 434  434  
 435  435  rfs4_index_t *
 436  436  rfs4_index_create(rfs4_table_t *table, char *keyname,
 437  437      uint32_t (*hash)(void *),
 438  438      bool_t (compare)(rfs4_entry_t, void *),
 439  439      void *(*mkkey)(rfs4_entry_t),
 440  440      bool_t createable)
 441  441  {
 442  442          rfs4_index_t *idx;
 443  443  
 444  444          ASSERT(table->dbt_idxcnt < table->dbt_maxcnt);
 445  445  
 446  446          idx = kmem_alloc(sizeof (rfs4_index_t), KM_SLEEP);
 447  447  
 448  448          idx->dbi_table = table;
 449  449          idx->dbi_keyname = kmem_alloc(strlen(keyname) + 1, KM_SLEEP);
 450  450          (void) strcpy(idx->dbi_keyname, keyname);
 451  451          idx->dbi_hash = hash;
 452  452          idx->dbi_compare = compare;
 453  453          idx->dbi_mkkey = mkkey;
 454  454          idx->dbi_tblidx = table->dbt_idxcnt;
 455  455          table->dbt_idxcnt++;
 456  456          if (createable) {
 457  457                  table->dbt_ccnt++;
 458  458                  if (table->dbt_ccnt > 1)
 459  459                          panic("Table %s currently can have only have one "
 460  460                              "index that will allow creation of entries",
 461  461                              table->dbt_name);
 462  462                  idx->dbi_createable = TRUE;
 463  463          } else {
 464  464                  idx->dbi_createable = FALSE;
 465  465          }
 466  466  
 467  467          idx->dbi_inext = table->dbt_indices;
 468  468          table->dbt_indices = idx;
 469  469          idx->dbi_buckets = kmem_zalloc(sizeof (rfs4_bucket_t) * table->dbt_len,
 470  470              KM_SLEEP);
 471  471  
 472  472          return (idx);
 473  473  }
 474  474  
 475  475  void
 476  476  rfs4_index_destroy(rfs4_index_t *idx)
 477  477  {
 478  478          kmem_free(idx->dbi_keyname, strlen(idx->dbi_keyname) + 1);
 479  479          kmem_free(idx->dbi_buckets,
 480  480              sizeof (rfs4_bucket_t) * idx->dbi_table->dbt_len);
 481  481          kmem_free(idx, sizeof (rfs4_index_t));
 482  482  }
 483  483  
 484  484  static void
 485  485  rfs4_dbe_destroy(rfs4_dbe_t *entry)
 486  486  {
 487  487          rfs4_index_t *idx;
 488  488          void *key;
 489  489          int i;
 490  490          rfs4_bucket_t *bp;
 491  491          rfs4_table_t *table = entry->dbe_table;
 492  492          rfs4_link_t *l;
 493  493  
 494  494          NFS4_DEBUG(table->dbt_debug & DESTROY_DEBUG,
 495  495              (CE_NOTE, "Destroying entry %p from %s",
 496  496              (void*)entry, table->dbt_name));
 497  497  
 498  498          mutex_enter(entry->dbe_lock);
 499  499          ASSERT(entry->dbe_refcnt == 0);
 500  500          mutex_exit(entry->dbe_lock);
 501  501  
 502  502          /* Unlink from all indices */
 503  503          for (idx = table->dbt_indices; idx; idx = idx->dbi_inext) {
 504  504                  l = &entry->dbe_indices[idx->dbi_tblidx];
 505  505                  /* check and see if we were ever linked in to the index */
 506  506                  if (INVALID_LINK(l)) {
 507  507                          ASSERT(l->next == NULL && l->prev == NULL);
 508  508                          continue;
 509  509                  }
 510  510                  key = idx->dbi_mkkey(entry->dbe_data);
 511  511                  i = HASH(idx, key);
 512  512                  bp = &idx->dbi_buckets[i];
 513  513                  ASSERT(bp->dbk_head != NULL);
 514  514                  DEQUEUE_IDX(bp, &entry->dbe_indices[idx->dbi_tblidx]);
 515  515          }
 516  516  
 517  517          /* Destroy user data */
 518  518          if (table->dbt_destroy)
 519  519                  (*table->dbt_destroy)(entry->dbe_data);
 520  520  
 521  521          if (table->dbt_id_space)
 522  522                  id_free(table->dbt_id_space, entry->dbe_id);
 523  523  
 524  524          mutex_enter(table->dbt_lock);
 525  525          table->dbt_count--;
 526  526          mutex_exit(table->dbt_lock);
 527  527  
 528  528          /* Destroy the entry itself */
 529  529          kmem_cache_free(table->dbt_mem_cache, entry);
 530  530  }
 531  531  
 532  532  
 533  533  static rfs4_dbe_t *
 534  534  rfs4_dbe_create(rfs4_table_t *table, id_t id, rfs4_entry_t data)
 535  535  {
 536  536          rfs4_dbe_t *entry;
 537  537          int i;
 538  538  
 539  539          NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
 540  540              (CE_NOTE, "Creating entry in table %s", table->dbt_name));
 541  541  
 542  542          entry = kmem_cache_alloc(table->dbt_mem_cache, KM_SLEEP);
 543  543  
 544  544          entry->dbe_refcnt = 1;
 545  545          entry->dbe_invalid = FALSE;
 546  546          entry->dbe_skipsearch = FALSE;
 547  547          entry->dbe_time_rele = 0;
 548  548          entry->dbe_id = 0;
 549  549  
 550  550          if (table->dbt_id_space)
 551  551                  entry->dbe_id = id;
 552  552          entry->dbe_table = table;
 553  553  
 554  554          for (i = 0; i < table->dbt_maxcnt; i++) {
 555  555                  entry->dbe_indices[i].next = entry->dbe_indices[i].prev = NULL;
 556  556                  entry->dbe_indices[i].entry = entry;
 557  557                  /*
 558  558                   * We mark the entry as not indexed by setting the low
 559  559                   * order bit, since address are word aligned. This has
 560  560                   * the advantage of causeing a trap if the address is
 561  561                   * used. After the entry is linked in to the
 562  562                   * corresponding index the bit will be cleared.
 563  563                   */
 564  564                  INVALIDATE_ADDR(entry->dbe_indices[i].entry);
 565  565          }
 566  566  
 567  567          entry->dbe_data = (rfs4_entry_t)&entry->dbe_indices[table->dbt_maxcnt];
 568  568          bzero(entry->dbe_data, table->dbt_usize);
 569  569          entry->dbe_data->dbe = entry;
 570  570  
 571  571          if (!(*table->dbt_create)(entry->dbe_data, data)) {
 572  572                  kmem_cache_free(table->dbt_mem_cache, entry);
 573  573                  return (NULL);
 574  574          }
 575  575  
 576  576          mutex_enter(table->dbt_lock);
 577  577          table->dbt_count++;
 578  578          mutex_exit(table->dbt_lock);
 579  579  
 580  580          return (entry);
 581  581  }
 582  582  
 583  583  static void
 584  584  rfs4_dbe_tabreap_adjust(rfs4_table_t *table)
 585  585  {
 586  586          clock_t         tabreap;
 587  587          clock_t         reap_int;
 588  588          uint32_t        in_use;
 589  589  
 590  590          /*
 591  591           * Adjust the table's reap interval based on the
 592  592           * number of id's currently in use. Each table's
 593  593           * default remains the same if id usage subsides.
 594  594           */
 595  595          ASSERT(MUTEX_HELD(&table->dbt_reaper_cv_lock));
 596  596          tabreap = MIN(rfs4_reap_interval, table->dbt_max_cache_time);
 597  597  
 598  598          in_use = table->dbt_count + 1;  /* see rfs4_dbe_create */
 599  599          if (in_use >= table->dbt_id_hwat) {
 600  600                  ASSERT(t_hreap != 0);
 601  601                  reap_int = (tabreap * t_hreap) / 100;
 602  602          } else if (in_use >= table->dbt_id_lwat) {
 603  603                  ASSERT(t_lreap != 0);
 604  604                  reap_int = (tabreap * t_lreap) / 100;
 605  605          } else {
 606  606                  reap_int = tabreap;
 607  607          }
 608  608          table->dbt_id_reap = reap_int;
 609  609          DTRACE_PROBE2(table__reap__interval, char *,
 610  610              table->dbt_name, time_t, table->dbt_id_reap);
 611  611  }
 612  612  
 613  613  rfs4_entry_t
 614  614  rfs4_dbsearch(rfs4_index_t *idx, void *key, bool_t *create, void *arg,
 615  615      rfs4_dbsearch_type_t dbsearch_type)
 616  616  {
 617  617          int              already_done;
 618  618          uint32_t         i;
 619  619          rfs4_table_t    *table = idx->dbi_table;
 620  620          rfs4_index_t    *ip;
 621  621          rfs4_bucket_t   *bp;
 622  622          rfs4_link_t     *l;
 623  623          rfs4_dbe_t      *entry;
 624  624          id_t             id = -1;
 625  625  
 626  626          i = HASH(idx, key);
 627  627          bp = &idx->dbi_buckets[i];
 628  628  
 629  629          NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
 630  630              (CE_NOTE, "Searching for key %p in table %s by %s",
 631  631              key, table->dbt_name, idx->dbi_keyname));
 632  632  
 633  633          rw_enter(bp->dbk_lock, RW_READER);
 634  634  retry:
 635  635          for (l = bp->dbk_head; l; l = l->next) {
 636  636                  if (l->entry->dbe_refcnt > 0 &&
 637  637                      (l->entry->dbe_skipsearch == FALSE ||
 638  638                      (l->entry->dbe_skipsearch == TRUE &&
 639  639                      dbsearch_type == RFS4_DBS_INVALID)) &&
 640  640                      (*idx->dbi_compare)(l->entry->dbe_data, key)) {
 641  641                          mutex_enter(l->entry->dbe_lock);
 642  642                          if (l->entry->dbe_refcnt == 0) {
 643  643                                  mutex_exit(l->entry->dbe_lock);
 644  644                                  continue;
 645  645                          }
 646  646  
 647  647                          /* place an additional hold since we are returning */
 648  648                          rfs4_dbe_hold(l->entry);
 649  649  
 650  650                          mutex_exit(l->entry->dbe_lock);
 651  651                          rw_exit(bp->dbk_lock);
 652  652  
 653  653                          *create = FALSE;
 654  654  
 655  655                          NFS4_DEBUG((table->dbt_debug & SEARCH_DEBUG),
 656  656                              (CE_NOTE, "Found entry %p for %p in table %s",
 657  657                              (void *)l->entry, key, table->dbt_name));
 658  658  
 659  659                          if (id != -1)
 660  660                                  id_free(table->dbt_id_space, id);
 661  661                          return (l->entry->dbe_data);
 662  662                  }
 663  663          }
 664  664  
 665  665          if (!*create || table->dbt_create == NULL || !idx->dbi_createable ||
 666  666              table->dbt_maxentries == table->dbt_count) {
 667  667                  NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
 668  668                      (CE_NOTE, "Entry for %p in %s not found",
 669  669                      key, table->dbt_name));
 670  670  
 671  671                  rw_exit(bp->dbk_lock);
 672  672                  if (id != -1)
 673  673                          id_free(table->dbt_id_space, id);
 674  674                  return (NULL);
 675  675          }
 676  676  
 677  677          if (table->dbt_id_space && id == -1) {
 678  678                  rw_exit(bp->dbk_lock);
 679  679  
 680  680                  /* get an id, ok to sleep for it here */
 681  681                  id = id_alloc(table->dbt_id_space);
 682  682                  ASSERT(id != -1);
 683  683  
 684  684                  mutex_enter(&table->dbt_reaper_cv_lock);
 685  685                  rfs4_dbe_tabreap_adjust(table);
 686  686                  mutex_exit(&table->dbt_reaper_cv_lock);
 687  687  
 688  688                  rw_enter(bp->dbk_lock, RW_WRITER);
 689  689                  goto retry;
 690  690          }
 691  691  
 692  692          /* get an exclusive lock on the bucket */
 693  693          if (rw_read_locked(bp->dbk_lock) && !rw_tryupgrade(bp->dbk_lock)) {
 694  694                  NFS4_DEBUG(table->dbt_debug & OTHER_DEBUG,
 695  695                      (CE_NOTE, "Trying to upgrade lock on "
 696  696                      "hash chain %d (%p) for  %s by %s",
 697  697                      i, (void*)bp, table->dbt_name, idx->dbi_keyname));
 698  698  
 699  699                  rw_exit(bp->dbk_lock);
 700  700                  rw_enter(bp->dbk_lock, RW_WRITER);
 701  701                  goto retry;
 702  702          }
 703  703  
 704  704          /* create entry */
 705  705          entry = rfs4_dbe_create(table, id, arg);
 706  706          if (entry == NULL) {
 707  707                  rw_exit(bp->dbk_lock);
 708  708                  if (id != -1)
 709  709                          id_free(table->dbt_id_space, id);
 710  710  
 711  711                  NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
 712  712                      (CE_NOTE, "Constructor for table %s failed",
 713  713                      table->dbt_name));
 714  714                  return (NULL);
 715  715          }
 716  716  
 717  717          /*
 718  718           * Add one ref for entry into table's hash - only one
 719  719           * reference added even though there may be multiple indices
 720  720           */
 721  721          rfs4_dbe_hold(entry);
 722  722          ENQUEUE(bp->dbk_head, &entry->dbe_indices[idx->dbi_tblidx]);
 723  723          VALIDATE_ADDR(entry->dbe_indices[idx->dbi_tblidx].entry);
 724  724  
 725  725          already_done = idx->dbi_tblidx;
 726  726          rw_exit(bp->dbk_lock);
 727  727  
 728  728          for (ip = table->dbt_indices; ip; ip = ip->dbi_inext) {
 729  729                  if (ip->dbi_tblidx == already_done)
 730  730                          continue;
 731  731                  l = &entry->dbe_indices[ip->dbi_tblidx];
 732  732                  i = HASH(ip, ip->dbi_mkkey(entry->dbe_data));
 733  733                  ASSERT(i < ip->dbi_table->dbt_len);
 734  734                  bp = &ip->dbi_buckets[i];
 735  735                  ENQUEUE_IDX(bp, l);
 736  736          }
 737  737  
 738  738          NFS4_DEBUG(
 739  739              table->dbt_debug & SEARCH_DEBUG || table->dbt_debug & CREATE_DEBUG,
 740  740              (CE_NOTE, "Entry %p created for %s = %p in table %s",
 741  741              (void*)entry, idx->dbi_keyname, (void*)key, table->dbt_name));
 742  742  
 743  743          return (entry->dbe_data);
 744  744  }
  
    | 
      ↓ open down ↓ | 
    744 lines elided | 
    
      ↑ open up ↑ | 
  
 745  745  
 746  746  /*ARGSUSED*/
 747  747  boolean_t
 748  748  rfs4_cpr_callb(void *arg, int code)
 749  749  {
 750  750          rfs4_bucket_t *buckets, *bp;
 751  751          rfs4_link_t *l;
 752  752          rfs4_client_t *cp;
 753  753          int i;
 754  754  
 755      -        nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
      755 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
 756  756          rfs4_table_t *table = nsrv4->rfs4_client_tab;
 757  757  
 758  758          /*
 759  759           * We get called for Suspend and Resume events.
 760  760           * For the suspend case we simply don't care!  Nor do we care if
 761  761           * there are no clients.
 762  762           */
 763  763          if (code == CB_CODE_CPR_CHKPT || table == NULL) {
 764  764                  return (B_TRUE);
 765  765          }
 766  766  
 767  767          buckets = table->dbt_indices->dbi_buckets;
 768  768  
 769  769          /*
 770  770           * When we get this far we are in the process of
 771  771           * resuming the system from a previous suspend.
 772  772           *
 773  773           * We are going to blast through and update the
 774  774           * last_access time for all the clients and in
 775  775           * doing so extend them by one lease period.
 776  776           */
 777  777          for (i = 0; i < table->dbt_len; i++) {
 778  778                  bp = &buckets[i];
 779  779                  for (l = bp->dbk_head; l; l = l->next) {
 780  780                          cp = (rfs4_client_t *)l->entry->dbe_data;
 781  781                          cp->rc_last_access = gethrestime_sec();
 782  782                  }
 783  783          }
 784  784  
 785  785          return (B_TRUE);
 786  786  }
 787  787  
 788  788  /*
 789  789   * Given a table, lock each of the buckets and walk all entries (in
 790  790   * turn locking those) and calling the provided "callout" function
 791  791   * with the provided parameter.  Obviously used to iterate across all
 792  792   * entries in a particular table via the database locking hierarchy.
 793  793   * Obviously the caller must not hold locks on any of the entries in
 794  794   * the specified table.
 795  795   */
 796  796  void
 797  797  rfs4_dbe_walk(rfs4_table_t *table,
 798  798      void (*callout)(rfs4_entry_t, void *),
 799  799      void *data)
 800  800  {
 801  801          rfs4_bucket_t *buckets = table->dbt_indices->dbi_buckets, *bp;
 802  802          rfs4_link_t *l;
 803  803          rfs4_dbe_t *entry;
 804  804          int i;
 805  805  
 806  806          NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
 807  807              (CE_NOTE, "Walking entries in %s", table->dbt_name));
 808  808  
 809  809          /* Walk the buckets looking for entries to release/destroy */
 810  810          for (i = 0; i < table->dbt_len; i++) {
 811  811                  bp = &buckets[i];
 812  812                  rw_enter(bp->dbk_lock, RW_READER);
 813  813                  for (l = bp->dbk_head; l; l = l->next) {
 814  814                          entry = l->entry;
 815  815                          mutex_enter(entry->dbe_lock);
 816  816                          (*callout)(entry->dbe_data, data);
 817  817                          mutex_exit(entry->dbe_lock);
 818  818                  }
 819  819                  rw_exit(bp->dbk_lock);
 820  820          }
 821  821  
 822  822          NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
 823  823              (CE_NOTE, "Walking entries complete %s", table->dbt_name));
 824  824  }
 825  825  
 826  826  
 827  827  static void
 828  828  rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
 829  829  {
 830  830          rfs4_index_t *idx = table->dbt_indices;
 831  831          rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
 832  832          rfs4_link_t *l, *t;
 833  833          rfs4_dbe_t *entry;
 834  834          bool_t found;
 835  835          int i;
 836  836          int count = 0;
 837  837  
 838  838          NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
 839  839              (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
 840  840              desired, cache_time, table->dbt_name));
 841  841  
 842  842          /* Walk the buckets looking for entries to release/destroy */
 843  843          for (i = 0; i < table->dbt_len; i++) {
 844  844                  int retries = 0;
 845  845                  bp = &buckets[i];
 846  846                  do {
 847  847                          found = FALSE;
 848  848                          rw_enter(bp->dbk_lock, RW_READER);
 849  849                          for (l = bp->dbk_head; l; l = l->next) {
 850  850                                  entry = l->entry;
 851  851                                  mutex_enter(entry->dbe_lock);
 852  852                                  ASSERT(entry->dbe_refcnt != 0);
 853  853                                  /*
 854  854                                   * Examine an entry.  Ref count of 1 means
 855  855                                   * that the only reference is for the hash
 856  856                                   * table reference.
 857  857                                   */
 858  858                                  if (entry->dbe_refcnt != 1) {
 859  859  #ifdef DEBUG
 860  860                                          rfs4_dbe_debug(entry);
 861  861  #endif
 862  862                                          mutex_exit(entry->dbe_lock);
 863  863                                          continue;
 864  864                                  }
 865  865                                  if ((entry->dbe_refcnt == 1) &&
 866  866                                      (table->dbt_reaper_shutdown ||
 867  867                                      table->dbt_expiry == NULL ||
 868  868                                      (*table->dbt_expiry)(entry->dbe_data))) {
 869  869                                          rfs4_dbe_rele_nolock(entry);
 870  870                                          count++;
 871  871                                          found = TRUE;
 872  872                                  }
 873  873                                  mutex_exit(entry->dbe_lock);
 874  874                          }
 875  875                          if (found) {
 876  876                                  if (!rw_tryupgrade(bp->dbk_lock)) {
 877  877                                          rw_exit(bp->dbk_lock);
 878  878                                          rw_enter(bp->dbk_lock, RW_WRITER);
 879  879                                  }
 880  880  
 881  881                                  l = bp->dbk_head;
 882  882                                  while (l) {
 883  883                                          t = l;
 884  884                                          entry = t->entry;
 885  885                                          l = l->next;
 886  886                                          mutex_enter(entry->dbe_lock);
 887  887                                          if (entry->dbe_refcnt == 0) {
 888  888                                                  DEQUEUE(bp->dbk_head, t);
 889  889                                                  mutex_exit(entry->dbe_lock);
 890  890                                                  t->next = NULL;
 891  891                                                  t->prev = NULL;
 892  892                                                  INVALIDATE_ADDR(t->entry);
 893  893                                                  rfs4_dbe_destroy(entry);
 894  894                                          } else
 895  895                                                  mutex_exit(entry->dbe_lock);
 896  896                                  }
 897  897                          }
 898  898                          rw_exit(bp->dbk_lock);
 899  899                          /*
 900  900                           * delay slightly if there is more work to do
 901  901                           * with the expectation that other reaper
 902  902                           * threads are freeing data structures as well
 903  903                           * and in turn will reduce ref counts on
 904  904                           * entries in this table allowing them to be
 905  905                           * released.  This is only done in the
 906  906                           * instance that the tables are being shut down.
 907  907                           */
 908  908                          if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) {
 909  909                                  delay(hz/100);
 910  910                                  retries++;
 911  911                          }
 912  912                  /*
 913  913                   * If this is a table shutdown, keep going until
 914  914                   * everything is gone
 915  915                   */
 916  916                  } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL && retries < 5);
 917  917  
 918  918                  if (!table->dbt_reaper_shutdown && desired && count >= desired)
 919  919                          break;
 920  920          }
 921  921  
 922  922          NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
 923  923              (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
 924  924              count, cache_time, table->dbt_name));
 925  925  }
 926  926  
 927  927  static void
 928  928  reaper_thread(caddr_t *arg)
 929  929  {
 930  930          rfs4_table_t    *table = (rfs4_table_t *)arg;
 931  931          clock_t          rc;
 932  932  
 933  933          NFS4_DEBUG(table->dbt_debug,
 934  934              (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
 935  935  
 936  936          CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
 937  937              callb_generic_cpr, "nfsv4Reaper");
 938  938  
 939  939          mutex_enter(&table->dbt_reaper_cv_lock);
 940  940          do {
 941  941                  CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info);
 942  942                  rc = cv_reltimedwait_sig(&table->dbt_reaper_wait,
 943  943                      &table->dbt_reaper_cv_lock,
 944  944                      SEC_TO_TICK(table->dbt_id_reap), TR_CLOCK_TICK);
 945  945                  CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info,
 946  946                      &table->dbt_reaper_cv_lock);
 947  947                  rfs4_dbe_reap(table, table->dbt_max_cache_time, 0);
 948  948          } while (rc != 0 && table->dbt_reaper_shutdown == FALSE);
 949  949  
 950  950          CALLB_CPR_EXIT(&table->dbt_reaper_cpr_info);
 951  951  
 952  952          NFS4_DEBUG(table->dbt_debug,
 953  953              (CE_NOTE, "rfs4_reaper_thread exiting for %s", table->dbt_name));
 954  954  
 955  955          /* Notify the database shutdown processing that the table is shutdown */
 956  956          mutex_enter(table->dbt_db->db_lock);
 957  957          table->dbt_db->db_shutdown_count--;
 958  958          cv_signal(&table->dbt_db->db_shutdown_wait);
 959  959          mutex_exit(table->dbt_db->db_lock);
 960  960          zthread_exit();
 961  961  }
 962  962  
 963  963  static void
 964  964  rfs4_start_reaper(rfs4_table_t *table)
 965  965  {
 966  966          if (table->dbt_max_cache_time == 0)
 967  967                  return;
 968  968  
 969  969          (void) zthread_create(NULL, 0, reaper_thread, table, 0,
 970  970              minclsyspri);
 971  971  }
 972  972  
 973  973  #ifdef DEBUG
 974  974  void
 975  975  rfs4_dbe_debug(rfs4_dbe_t *entry)
 976  976  {
 977  977          cmn_err(CE_NOTE, "Entry %p from table %s",
 978  978              (void *)entry, entry->dbe_table->dbt_name);
 979  979          cmn_err(CE_CONT, "\trefcnt = %d id = %d",
 980  980              entry->dbe_refcnt, entry->dbe_id);
 981  981  }
 982  982  #endif
  
    | 
      ↓ open down ↓ | 
    217 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX