Print this page
    
Revert "NEX-20260 NFS hung in transitional state when RSF marks it maintenance"
This reverts commit 9bf6e5f740709f470ba350df64cd9f2c93f3f0a7.
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs4_db.c
          +++ new/usr/src/uts/common/fs/nfs/nfs4_db.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  
    | 
      ↓ open down ↓ | 
    16 lines elided | 
    
      ↑ open up ↑ | 
  
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27      - * Copyright 2019 Nexenta Systems, Inc.
       27 + * Copyright 2018 Nexenta Systems, Inc.
  28   28   */
  29   29  
  30   30  #include <sys/systm.h>
  31   31  #include <sys/cmn_err.h>
  32   32  #include <sys/kmem.h>
  33   33  #include <sys/disp.h>
  34   34  #include <sys/id_space.h>
       35 +#include <sys/atomic.h>
  35   36  #include <rpc/rpc.h>
  36   37  #include <nfs/nfs4.h>
  37   38  #include <nfs/nfs4_db_impl.h>
  38   39  #include <sys/sdt.h>
  39   40  
  40   41  static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
  41   42  
  42   43  static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
  43   44  static void rfs4_dbe_destroy(rfs4_dbe_t *);
  44   45  static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
  45   46  static void rfs4_start_reaper(rfs4_table_t *);
  46   47  
  47   48  /*
  48   49   * t_lowat - integer percentage of table entries        /etc/system only
  49   50   * t_hiwat - integer percentage of table entries        /etc/system only
  50   51   * t_lreap - integer percentage of table reap time      mdb or /etc/system
  51   52   * t_hreap - integer percentage of table reap time      mdb or /etc/system
  52   53   */
  53   54  uint32_t        t_lowat = 50;   /* reap at t_lreap when id's in use hit 50% */
  54   55  uint32_t        t_hiwat = 75;   /* reap at t_hreap when id's in use hit 75% */
  55   56  time_t          t_lreap = 50;   /* default to 50% of table's reap interval */
  56   57  time_t          t_hreap = 10;   /* default to 10% of table's reap interval */
  
    | 
      ↓ open down ↓ | 
    12 lines elided | 
    
      ↑ open up ↑ | 
  
  57   58  
  58   59  id_t
  59   60  rfs4_dbe_getid(rfs4_dbe_t *entry)
  60   61  {
  61   62          return (entry->dbe_id);
  62   63  }
  63   64  
  64   65  void
  65   66  rfs4_dbe_hold(rfs4_dbe_t *entry)
  66   67  {
  67      -        if (!MUTEX_HELD(entry->dbe_lock)) {
  68      -                mutex_enter(entry->dbe_lock);
  69      -                entry->dbe_refcnt++;
  70      -                mutex_exit(entry->dbe_lock);
  71      -        } else {
  72      -                entry->dbe_refcnt++;
  73      -        }
       68 +        atomic_inc_32(&entry->dbe_refcnt);
  74   69  }
  75   70  
  76   71  /*
  77   72   * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
  78   73   */
  79   74  void
  80   75  rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
  81   76  {
  82      -        if (!MUTEX_HELD(entry->dbe_lock)) {
  83      -                ASSERT(entry->dbe_refcnt > 0);
  84      -                mutex_enter(entry->dbe_lock);
  85      -                entry->dbe_refcnt--;
  86      -                mutex_exit(entry->dbe_lock);
  87      -        } else {
  88      -                entry->dbe_refcnt--;
  89      -        }
       77 +        atomic_dec_32(&entry->dbe_refcnt);
  90   78  }
  91   79  
  92   80  
  93   81  uint32_t
  94   82  rfs4_dbe_refcnt(rfs4_dbe_t *entry)
  95   83  {
  96   84          return (entry->dbe_refcnt);
  97   85  }
  98   86  
  99   87  /*
 100   88   * Mark an entry such that the dbsearch will skip it.
 101   89   * Caller does not want this entry to be found any longer
 102   90   */
 103   91  void
 104   92  rfs4_dbe_invalidate(rfs4_dbe_t *entry)
 105   93  {
 106      -        if (!MUTEX_HELD(entry->dbe_lock)) {
 107      -                mutex_enter(entry->dbe_lock);
 108      -                entry->dbe_invalid = TRUE;
 109      -                entry->dbe_skipsearch = TRUE;
 110      -                mutex_exit(entry->dbe_lock);
 111      -        } else {
 112      -                entry->dbe_invalid = TRUE;
 113      -                entry->dbe_skipsearch = TRUE;
 114      -        }
       94 +        entry->dbe_invalid = TRUE;
       95 +        entry->dbe_skipsearch = TRUE;
 115   96  }
 116   97  
 117   98  /*
 118   99   * Is this entry invalid?
 119  100   */
 120  101  bool_t
 121  102  rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
 122  103  {
 123  104          return (entry->dbe_invalid);
 124  105  }
 125  106  
 126  107  time_t
 127  108  rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
 128  109  {
 129  110          return (entry->dbe_time_rele);
 130  111  }
 131  112  
 132  113  /*
 133  114   * Use these to temporarily hide/unhide a db entry.
 134  115   */
 135  116  void
 136  117  rfs4_dbe_hide(rfs4_dbe_t *entry)
 137  118  {
 138  119          rfs4_dbe_lock(entry);
 139  120          entry->dbe_skipsearch = TRUE;
 140  121          rfs4_dbe_unlock(entry);
 141  122  }
 142  123  
 143  124  void
 144  125  rfs4_dbe_unhide(rfs4_dbe_t *entry)
 145  126  {
  
    | 
      ↓ open down ↓ | 
    21 lines elided | 
    
      ↑ open up ↑ | 
  
 146  127          rfs4_dbe_lock(entry);
 147  128          entry->dbe_skipsearch = FALSE;
 148  129          rfs4_dbe_unlock(entry);
 149  130  }
 150  131  
 151  132  void
 152  133  rfs4_dbe_rele(rfs4_dbe_t *entry)
 153  134  {
 154  135          mutex_enter(entry->dbe_lock);
 155  136          ASSERT(entry->dbe_refcnt > 1);
 156      -        entry->dbe_refcnt--;
      137 +        atomic_dec_32(&entry->dbe_refcnt);
 157  138          entry->dbe_time_rele = gethrestime_sec();
 158  139          mutex_exit(entry->dbe_lock);
 159  140  }
 160  141  
 161  142  void
 162  143  rfs4_dbe_lock(rfs4_dbe_t *entry)
 163  144  {
 164  145          mutex_enter(entry->dbe_lock);
 165  146  }
 166  147  
 167  148  void
 168  149  rfs4_dbe_unlock(rfs4_dbe_t *entry)
 169  150  {
 170  151          mutex_exit(entry->dbe_lock);
 171  152  }
 172  153  
 173  154  bool_t
 174  155  rfs4_dbe_islocked(rfs4_dbe_t *entry)
 175  156  {
 176  157          return (mutex_owned(entry->dbe_lock));
 177  158  }
 178  159  
 179  160  clock_t
 180  161  rfs4_dbe_twait(rfs4_dbe_t *entry, clock_t timeout)
 181  162  {
 182  163          return (cv_timedwait(entry->dbe_cv, entry->dbe_lock, timeout));
 183  164  }
 184  165  
 185  166  void
 186  167  rfs4_dbe_cv_broadcast(rfs4_dbe_t *entry)
 187  168  {
 188  169          cv_broadcast(entry->dbe_cv);
 189  170  }
 190  171  
 191  172  /* ARGSUSED */
 192  173  static int
 193  174  rfs4_dbe_kmem_constructor(void *obj, void *private, int kmflag)
 194  175  {
 195  176          rfs4_dbe_t *entry = obj;
 196  177  
 197  178          mutex_init(entry->dbe_lock, NULL, MUTEX_DEFAULT, NULL);
 198  179          cv_init(entry->dbe_cv, NULL, CV_DEFAULT, NULL);
 199  180  
 200  181          return (0);
 201  182  }
 202  183  
 203  184  static void
 204  185  rfs4_dbe_kmem_destructor(void *obj, void *private)
 205  186  {
 206  187          rfs4_dbe_t *entry = obj;
 207  188          /*LINTED*/
 208  189          rfs4_table_t *table = private;
 209  190  
 210  191          mutex_destroy(entry->dbe_lock);
 211  192          cv_destroy(entry->dbe_cv);
 212  193  }
 213  194  
 214  195  rfs4_database_t *
 215  196  rfs4_database_create(uint32_t flags)
 216  197  {
 217  198          rfs4_database_t *db;
 218  199  
 219  200          db = kmem_alloc(sizeof (rfs4_database_t), KM_SLEEP);
 220  201          mutex_init(db->db_lock, NULL, MUTEX_DEFAULT, NULL);
 221  202          db->db_tables = NULL;
 222  203          db->db_debug_flags = flags;
 223  204          db->db_shutdown_count = 0;
 224  205          cv_init(&db->db_shutdown_wait, NULL, CV_DEFAULT, NULL);
 225  206          return (db);
 226  207  }
 227  208  
 228  209  
 229  210  /*
 230  211   * The reaper threads that have been created for the tables in this
 231  212   * database must be stopped and the entries in the tables released.
 232  213   * Each table will be marked as "shutdown" and the reaper threads
 233  214   * poked and they will see that a shutdown is in progress and cleanup
 234  215   * and exit.  This function waits for all reaper threads to stop
 235  216   * before returning to the caller.
 236  217   */
 237  218  void
 238  219  rfs4_database_shutdown(rfs4_database_t *db)
 239  220  {
 240  221          rfs4_table_t *table;
 241  222  
 242  223          mutex_enter(db->db_lock);
 243  224          for (table = db->db_tables; table; table = table->dbt_tnext) {
 244  225                  mutex_enter(&table->dbt_reaper_cv_lock);
 245  226                  table->dbt_reaper_shutdown = TRUE;
 246  227                  cv_broadcast(&table->dbt_reaper_wait);
 247  228                  db->db_shutdown_count++;
 248  229                  mutex_exit(&table->dbt_reaper_cv_lock);
 249  230          }
 250  231          while (db->db_shutdown_count > 0) {
 251  232                  cv_wait(&db->db_shutdown_wait, db->db_lock);
 252  233          }
 253  234          mutex_exit(db->db_lock);
 254  235  }
 255  236  
 256  237  /*
 257  238   * Given a database that has been "shutdown" by the function above all
 258  239   * of the table tables are destroyed and then the database itself
 259  240   * freed.
 260  241   */
 261  242  void
 262  243  rfs4_database_destroy(rfs4_database_t *db)
 263  244  {
 264  245          rfs4_table_t *next, *tmp;
 265  246  
 266  247          for (next = db->db_tables; next; ) {
 267  248                  tmp = next;
 268  249                  next = tmp->dbt_tnext;
 269  250                  rfs4_table_destroy(db, tmp);
 270  251          }
 271  252  
 272  253          mutex_destroy(db->db_lock);
 273  254          kmem_free(db, sizeof (rfs4_database_t));
 274  255  }
 275  256  
 276  257  /*
 277  258   * Used to get the correct kmem_cache database for the state table being
 278  259   * created.
 279  260   * Helper function for rfs4_table_create
 280  261   */
 281  262  static kmem_cache_t *
 282  263  get_db_mem_cache(char *name)
 283  264  {
 284  265          int i;
 285  266  
 286  267          for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
 287  268                  if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0)
 288  269                          return (rfs4_db_mem_cache_table[i].r_db_mem_cache);
 289  270          }
 290  271          /*
 291  272           * There is no associated kmem cache for this NFS4 server state
 292  273           * table name
 293  274           */
 294  275          return (NULL);
 295  276  }
 296  277  
 297  278  /*
 298  279   * Used to initialize the global NFSv4 server state database.
 299  280   * Helper funtion for rfs4_state_g_init and called when module is loaded.
 300  281   */
 301  282  kmem_cache_t *
 302  283  /* CSTYLED */
 303  284  nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx)
 304  285  {
 305  286          kmem_cache_t *mem_cache = kmem_cache_create(cache_name,
 306  287              sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
 307  288              0,
 308  289              rfs4_dbe_kmem_constructor,
 309  290              rfs4_dbe_kmem_destructor,
 310  291              NULL,
 311  292              NULL,
 312  293              NULL,
 313  294              0);
 314  295          (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name,
 315  296              strlen(cache_name) + 1);
 316  297          rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache;
 317  298          return (mem_cache);
 318  299  }
 319  300  
 320  301  rfs4_table_t *
 321  302  rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
 322  303      uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
 323  304      void (*destroy)(rfs4_entry_t),
 324  305      bool_t (*expiry)(rfs4_entry_t),
 325  306      uint32_t size, uint32_t hashsize,
 326  307      uint32_t maxentries, id_t start)
 327  308  {
 328  309          rfs4_table_t    *table;
 329  310          int              len;
 330  311          char            *cache_name;
 331  312          char            *id_name;
 332  313  
 333  314          table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP);
 334  315          table->dbt_db = db;
 335  316          rw_init(table->dbt_t_lock, NULL, RW_DEFAULT, NULL);
 336  317          mutex_init(table->dbt_lock, NULL, MUTEX_DEFAULT, NULL);
 337  318          mutex_init(&table->dbt_reaper_cv_lock, NULL, MUTEX_DEFAULT, NULL);
 338  319          cv_init(&table->dbt_reaper_wait, NULL, CV_DEFAULT, NULL);
 339  320  
 340  321          len = strlen(tabname);
 341  322          table->dbt_name = kmem_alloc(len+1, KM_SLEEP);
 342  323          cache_name = kmem_alloc(len + 12 /* "_entry_cache" */ + 1, KM_SLEEP);
 343  324          (void) strcpy(table->dbt_name, tabname);
 344  325          (void) sprintf(cache_name, "%s_entry_cache", table->dbt_name);
 345  326          table->dbt_max_cache_time = max_cache_time;
 346  327          table->dbt_usize = size;
 347  328          table->dbt_len = hashsize;
 348  329          table->dbt_count = 0;
 349  330          table->dbt_idxcnt = 0;
 350  331          table->dbt_ccnt = 0;
 351  332          table->dbt_maxcnt = idxcnt;
 352  333          table->dbt_indices = NULL;
 353  334          table->dbt_id_space = NULL;
 354  335          table->dbt_reaper_shutdown = FALSE;
 355  336  
 356  337          if (start >= 0) {
 357  338                  if (maxentries + (uint32_t)start > (uint32_t)INT32_MAX)
 358  339                          maxentries = INT32_MAX - start;
 359  340                  id_name = kmem_alloc(len + 9 /* "_id_space" */ + 1, KM_SLEEP);
 360  341                  (void) sprintf(id_name, "%s_id_space", table->dbt_name);
 361  342                  table->dbt_id_space = id_space_create(id_name, start,
 362  343                      maxentries + start);
 363  344                  kmem_free(id_name, len + 10);
 364  345          }
 365  346          ASSERT(t_lowat != 0);
 366  347          table->dbt_id_lwat = (maxentries * t_lowat) / 100;
 367  348          ASSERT(t_hiwat != 0);
 368  349          table->dbt_id_hwat = (maxentries * t_hiwat) / 100;
 369  350          table->dbt_id_reap = MIN(rfs4_reap_interval, max_cache_time);
 370  351          table->dbt_maxentries = maxentries;
 371  352          table->dbt_create = create;
 372  353          table->dbt_destroy = destroy;
 373  354          table->dbt_expiry = expiry;
 374  355  
 375  356          /*
 376  357           * get the correct kmem_cache for this table type based on the name.
 377  358           */
 378  359          table->dbt_mem_cache = get_db_mem_cache(cache_name);
 379  360  
 380  361          kmem_free(cache_name, len+13);
 381  362  
 382  363          table->dbt_debug = db->db_debug_flags;
 383  364  
 384  365          mutex_enter(db->db_lock);
 385  366          table->dbt_tnext = db->db_tables;
 386  367          db->db_tables = table;
 387  368          mutex_exit(db->db_lock);
 388  369  
 389  370          rfs4_start_reaper(table);
 390  371  
 391  372          return (table);
 392  373  }
 393  374  
 394  375  void
 395  376  rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table)
 396  377  {
 397  378          rfs4_table_t *p;
 398  379          rfs4_index_t *idx;
 399  380  
 400  381          ASSERT(table->dbt_count == 0);
 401  382  
 402  383          mutex_enter(db->db_lock);
 403  384          if (table == db->db_tables)
 404  385                  db->db_tables = table->dbt_tnext;
 405  386          else {
 406  387                  for (p = db->db_tables; p; p = p->dbt_tnext)
 407  388                          if (p->dbt_tnext == table) {
 408  389                                  p->dbt_tnext = table->dbt_tnext;
 409  390                                  table->dbt_tnext = NULL;
 410  391                                  break;
 411  392                          }
 412  393                  ASSERT(p != NULL);
 413  394          }
 414  395          mutex_exit(db->db_lock);
 415  396  
 416  397          /* Destroy indices */
 417  398          while (table->dbt_indices) {
 418  399                  idx = table->dbt_indices;
 419  400                  table->dbt_indices = idx->dbi_inext;
 420  401                  rfs4_index_destroy(idx);
 421  402          }
 422  403  
 423  404          rw_destroy(table->dbt_t_lock);
 424  405          mutex_destroy(table->dbt_lock);
 425  406          mutex_destroy(&table->dbt_reaper_cv_lock);
 426  407          cv_destroy(&table->dbt_reaper_wait);
 427  408  
 428  409          kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
 429  410          if (table->dbt_id_space)
 430  411                  id_space_destroy(table->dbt_id_space);
 431  412          table->dbt_mem_cache = NULL;
 432  413          kmem_free(table, sizeof (rfs4_table_t));
 433  414  }
 434  415  
 435  416  rfs4_index_t *
 436  417  rfs4_index_create(rfs4_table_t *table, char *keyname,
 437  418      uint32_t (*hash)(void *),
 438  419      bool_t (compare)(rfs4_entry_t, void *),
 439  420      void *(*mkkey)(rfs4_entry_t),
 440  421      bool_t createable)
 441  422  {
 442  423          rfs4_index_t *idx;
 443  424  
 444  425          ASSERT(table->dbt_idxcnt < table->dbt_maxcnt);
 445  426  
 446  427          idx = kmem_alloc(sizeof (rfs4_index_t), KM_SLEEP);
 447  428  
 448  429          idx->dbi_table = table;
 449  430          idx->dbi_keyname = kmem_alloc(strlen(keyname) + 1, KM_SLEEP);
 450  431          (void) strcpy(idx->dbi_keyname, keyname);
 451  432          idx->dbi_hash = hash;
 452  433          idx->dbi_compare = compare;
 453  434          idx->dbi_mkkey = mkkey;
 454  435          idx->dbi_tblidx = table->dbt_idxcnt;
 455  436          table->dbt_idxcnt++;
 456  437          if (createable) {
 457  438                  table->dbt_ccnt++;
 458  439                  if (table->dbt_ccnt > 1)
 459  440                          panic("Table %s currently can have only have one "
 460  441                              "index that will allow creation of entries",
 461  442                              table->dbt_name);
 462  443                  idx->dbi_createable = TRUE;
 463  444          } else {
 464  445                  idx->dbi_createable = FALSE;
 465  446          }
 466  447  
 467  448          idx->dbi_inext = table->dbt_indices;
 468  449          table->dbt_indices = idx;
 469  450          idx->dbi_buckets = kmem_zalloc(sizeof (rfs4_bucket_t) * table->dbt_len,
 470  451              KM_SLEEP);
 471  452  
 472  453          return (idx);
 473  454  }
 474  455  
 475  456  void
 476  457  rfs4_index_destroy(rfs4_index_t *idx)
 477  458  {
 478  459          kmem_free(idx->dbi_keyname, strlen(idx->dbi_keyname) + 1);
 479  460          kmem_free(idx->dbi_buckets,
 480  461              sizeof (rfs4_bucket_t) * idx->dbi_table->dbt_len);
 481  462          kmem_free(idx, sizeof (rfs4_index_t));
 482  463  }
 483  464  
 484  465  static void
 485  466  rfs4_dbe_destroy(rfs4_dbe_t *entry)
 486  467  {
 487  468          rfs4_index_t *idx;
 488  469          void *key;
 489  470          int i;
 490  471          rfs4_bucket_t *bp;
 491  472          rfs4_table_t *table = entry->dbe_table;
 492  473          rfs4_link_t *l;
 493  474  
 494  475          NFS4_DEBUG(table->dbt_debug & DESTROY_DEBUG,
 495  476              (CE_NOTE, "Destroying entry %p from %s",
 496  477              (void*)entry, table->dbt_name));
 497  478  
 498  479          mutex_enter(entry->dbe_lock);
 499  480          ASSERT(entry->dbe_refcnt == 0);
 500  481          mutex_exit(entry->dbe_lock);
 501  482  
 502  483          /* Unlink from all indices */
 503  484          for (idx = table->dbt_indices; idx; idx = idx->dbi_inext) {
 504  485                  l = &entry->dbe_indices[idx->dbi_tblidx];
 505  486                  /* check and see if we were ever linked in to the index */
 506  487                  if (INVALID_LINK(l)) {
 507  488                          ASSERT(l->next == NULL && l->prev == NULL);
 508  489                          continue;
 509  490                  }
 510  491                  key = idx->dbi_mkkey(entry->dbe_data);
 511  492                  i = HASH(idx, key);
 512  493                  bp = &idx->dbi_buckets[i];
 513  494                  ASSERT(bp->dbk_head != NULL);
 514  495                  DEQUEUE_IDX(bp, &entry->dbe_indices[idx->dbi_tblidx]);
 515  496          }
 516  497  
 517  498          /* Destroy user data */
 518  499          if (table->dbt_destroy)
 519  500                  (*table->dbt_destroy)(entry->dbe_data);
 520  501  
 521  502          if (table->dbt_id_space)
 522  503                  id_free(table->dbt_id_space, entry->dbe_id);
 523  504  
 524  505          mutex_enter(table->dbt_lock);
 525  506          table->dbt_count--;
 526  507          mutex_exit(table->dbt_lock);
 527  508  
 528  509          /* Destroy the entry itself */
 529  510          kmem_cache_free(table->dbt_mem_cache, entry);
 530  511  }
 531  512  
 532  513  
 533  514  static rfs4_dbe_t *
 534  515  rfs4_dbe_create(rfs4_table_t *table, id_t id, rfs4_entry_t data)
 535  516  {
 536  517          rfs4_dbe_t *entry;
 537  518          int i;
 538  519  
 539  520          NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
 540  521              (CE_NOTE, "Creating entry in table %s", table->dbt_name));
 541  522  
 542  523          entry = kmem_cache_alloc(table->dbt_mem_cache, KM_SLEEP);
 543  524  
 544  525          entry->dbe_refcnt = 1;
 545  526          entry->dbe_invalid = FALSE;
 546  527          entry->dbe_skipsearch = FALSE;
 547  528          entry->dbe_time_rele = 0;
 548  529          entry->dbe_id = 0;
 549  530  
 550  531          if (table->dbt_id_space)
 551  532                  entry->dbe_id = id;
 552  533          entry->dbe_table = table;
 553  534  
 554  535          for (i = 0; i < table->dbt_maxcnt; i++) {
 555  536                  entry->dbe_indices[i].next = entry->dbe_indices[i].prev = NULL;
 556  537                  entry->dbe_indices[i].entry = entry;
 557  538                  /*
 558  539                   * We mark the entry as not indexed by setting the low
 559  540                   * order bit, since address are word aligned. This has
 560  541                   * the advantage of causeing a trap if the address is
 561  542                   * used. After the entry is linked in to the
 562  543                   * corresponding index the bit will be cleared.
 563  544                   */
 564  545                  INVALIDATE_ADDR(entry->dbe_indices[i].entry);
 565  546          }
 566  547  
 567  548          entry->dbe_data = (rfs4_entry_t)&entry->dbe_indices[table->dbt_maxcnt];
 568  549          bzero(entry->dbe_data, table->dbt_usize);
 569  550          entry->dbe_data->dbe = entry;
 570  551  
 571  552          if (!(*table->dbt_create)(entry->dbe_data, data)) {
 572  553                  kmem_cache_free(table->dbt_mem_cache, entry);
 573  554                  return (NULL);
 574  555          }
 575  556  
 576  557          mutex_enter(table->dbt_lock);
 577  558          table->dbt_count++;
 578  559          mutex_exit(table->dbt_lock);
 579  560  
 580  561          return (entry);
 581  562  }
 582  563  
 583  564  static void
 584  565  rfs4_dbe_tabreap_adjust(rfs4_table_t *table)
 585  566  {
 586  567          clock_t         tabreap;
 587  568          clock_t         reap_int;
 588  569          uint32_t        in_use;
 589  570  
 590  571          /*
 591  572           * Adjust the table's reap interval based on the
 592  573           * number of id's currently in use. Each table's
 593  574           * default remains the same if id usage subsides.
 594  575           */
 595  576          ASSERT(MUTEX_HELD(&table->dbt_reaper_cv_lock));
 596  577          tabreap = MIN(rfs4_reap_interval, table->dbt_max_cache_time);
 597  578  
 598  579          in_use = table->dbt_count + 1;  /* see rfs4_dbe_create */
 599  580          if (in_use >= table->dbt_id_hwat) {
 600  581                  ASSERT(t_hreap != 0);
 601  582                  reap_int = (tabreap * t_hreap) / 100;
 602  583          } else if (in_use >= table->dbt_id_lwat) {
 603  584                  ASSERT(t_lreap != 0);
 604  585                  reap_int = (tabreap * t_lreap) / 100;
 605  586          } else {
 606  587                  reap_int = tabreap;
 607  588          }
 608  589          table->dbt_id_reap = reap_int;
 609  590          DTRACE_PROBE2(table__reap__interval, char *,
 610  591              table->dbt_name, time_t, table->dbt_id_reap);
 611  592  }
 612  593  
 613  594  rfs4_entry_t
 614  595  rfs4_dbsearch(rfs4_index_t *idx, void *key, bool_t *create, void *arg,
 615  596      rfs4_dbsearch_type_t dbsearch_type)
 616  597  {
 617  598          int              already_done;
 618  599          uint32_t         i;
 619  600          rfs4_table_t    *table = idx->dbi_table;
 620  601          rfs4_index_t    *ip;
 621  602          rfs4_bucket_t   *bp;
 622  603          rfs4_link_t     *l;
 623  604          rfs4_dbe_t      *entry;
 624  605          id_t             id = -1;
 625  606  
 626  607          i = HASH(idx, key);
 627  608          bp = &idx->dbi_buckets[i];
 628  609  
 629  610          NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
 630  611              (CE_NOTE, "Searching for key %p in table %s by %s",
 631  612              key, table->dbt_name, idx->dbi_keyname));
 632  613  
 633  614          rw_enter(bp->dbk_lock, RW_READER);
 634  615  retry:
 635  616          for (l = bp->dbk_head; l; l = l->next) {
 636  617                  if (l->entry->dbe_refcnt > 0 &&
 637  618                      (l->entry->dbe_skipsearch == FALSE ||
 638  619                      (l->entry->dbe_skipsearch == TRUE &&
 639  620                      dbsearch_type == RFS4_DBS_INVALID)) &&
 640  621                      (*idx->dbi_compare)(l->entry->dbe_data, key)) {
 641  622                          mutex_enter(l->entry->dbe_lock);
 642  623                          if (l->entry->dbe_refcnt == 0) {
 643  624                                  mutex_exit(l->entry->dbe_lock);
 644  625                                  continue;
 645  626                          }
 646  627  
 647  628                          /* place an additional hold since we are returning */
 648  629                          rfs4_dbe_hold(l->entry);
 649  630  
 650  631                          mutex_exit(l->entry->dbe_lock);
 651  632                          rw_exit(bp->dbk_lock);
 652  633  
 653  634                          *create = FALSE;
 654  635  
 655  636                          NFS4_DEBUG((table->dbt_debug & SEARCH_DEBUG),
 656  637                              (CE_NOTE, "Found entry %p for %p in table %s",
 657  638                              (void *)l->entry, key, table->dbt_name));
 658  639  
 659  640                          if (id != -1)
 660  641                                  id_free(table->dbt_id_space, id);
 661  642                          return (l->entry->dbe_data);
 662  643                  }
 663  644          }
 664  645  
 665  646          if (!*create || table->dbt_create == NULL || !idx->dbi_createable ||
 666  647              table->dbt_maxentries == table->dbt_count) {
 667  648                  NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
 668  649                      (CE_NOTE, "Entry for %p in %s not found",
 669  650                      key, table->dbt_name));
 670  651  
 671  652                  rw_exit(bp->dbk_lock);
 672  653                  if (id != -1)
 673  654                          id_free(table->dbt_id_space, id);
 674  655                  return (NULL);
 675  656          }
 676  657  
 677  658          if (table->dbt_id_space && id == -1) {
 678  659                  rw_exit(bp->dbk_lock);
 679  660  
 680  661                  /* get an id, ok to sleep for it here */
 681  662                  id = id_alloc(table->dbt_id_space);
 682  663                  ASSERT(id != -1);
 683  664  
 684  665                  mutex_enter(&table->dbt_reaper_cv_lock);
 685  666                  rfs4_dbe_tabreap_adjust(table);
 686  667                  mutex_exit(&table->dbt_reaper_cv_lock);
 687  668  
 688  669                  rw_enter(bp->dbk_lock, RW_WRITER);
 689  670                  goto retry;
 690  671          }
 691  672  
 692  673          /* get an exclusive lock on the bucket */
 693  674          if (rw_read_locked(bp->dbk_lock) && !rw_tryupgrade(bp->dbk_lock)) {
 694  675                  NFS4_DEBUG(table->dbt_debug & OTHER_DEBUG,
 695  676                      (CE_NOTE, "Trying to upgrade lock on "
 696  677                      "hash chain %d (%p) for  %s by %s",
 697  678                      i, (void*)bp, table->dbt_name, idx->dbi_keyname));
 698  679  
 699  680                  rw_exit(bp->dbk_lock);
 700  681                  rw_enter(bp->dbk_lock, RW_WRITER);
 701  682                  goto retry;
 702  683          }
 703  684  
 704  685          /* create entry */
 705  686          entry = rfs4_dbe_create(table, id, arg);
 706  687          if (entry == NULL) {
 707  688                  rw_exit(bp->dbk_lock);
 708  689                  if (id != -1)
 709  690                          id_free(table->dbt_id_space, id);
 710  691  
 711  692                  NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
 712  693                      (CE_NOTE, "Constructor for table %s failed",
 713  694                      table->dbt_name));
 714  695                  return (NULL);
 715  696          }
 716  697  
 717  698          /*
 718  699           * Add one ref for entry into table's hash - only one
 719  700           * reference added even though there may be multiple indices
 720  701           */
 721  702          rfs4_dbe_hold(entry);
 722  703          ENQUEUE(bp->dbk_head, &entry->dbe_indices[idx->dbi_tblidx]);
 723  704          VALIDATE_ADDR(entry->dbe_indices[idx->dbi_tblidx].entry);
 724  705  
 725  706          already_done = idx->dbi_tblidx;
 726  707          rw_exit(bp->dbk_lock);
 727  708  
 728  709          for (ip = table->dbt_indices; ip; ip = ip->dbi_inext) {
 729  710                  if (ip->dbi_tblidx == already_done)
 730  711                          continue;
 731  712                  l = &entry->dbe_indices[ip->dbi_tblidx];
 732  713                  i = HASH(ip, ip->dbi_mkkey(entry->dbe_data));
 733  714                  ASSERT(i < ip->dbi_table->dbt_len);
 734  715                  bp = &ip->dbi_buckets[i];
 735  716                  ENQUEUE_IDX(bp, l);
 736  717          }
 737  718  
 738  719          NFS4_DEBUG(
 739  720              table->dbt_debug & SEARCH_DEBUG || table->dbt_debug & CREATE_DEBUG,
 740  721              (CE_NOTE, "Entry %p created for %s = %p in table %s",
 741  722              (void*)entry, idx->dbi_keyname, (void*)key, table->dbt_name));
 742  723  
 743  724          return (entry->dbe_data);
 744  725  }
 745  726  
 746  727  /*ARGSUSED*/
 747  728  boolean_t
 748  729  rfs4_cpr_callb(void *arg, int code)
 749  730  {
 750  731          rfs4_bucket_t *buckets, *bp;
 751  732          rfs4_link_t *l;
 752  733          rfs4_client_t *cp;
 753  734          int i;
 754  735  
 755  736          nfs4_srv_t *nsrv4 = nfs4_get_srv();
 756  737          rfs4_table_t *table = nsrv4->rfs4_client_tab;
 757  738  
 758  739          /*
 759  740           * We get called for Suspend and Resume events.
 760  741           * For the suspend case we simply don't care!  Nor do we care if
 761  742           * there are no clients.
 762  743           */
 763  744          if (code == CB_CODE_CPR_CHKPT || table == NULL) {
 764  745                  return (B_TRUE);
 765  746          }
 766  747  
 767  748          buckets = table->dbt_indices->dbi_buckets;
 768  749  
 769  750          /*
 770  751           * When we get this far we are in the process of
 771  752           * resuming the system from a previous suspend.
 772  753           *
 773  754           * We are going to blast through and update the
 774  755           * last_access time for all the clients and in
 775  756           * doing so extend them by one lease period.
 776  757           */
 777  758          for (i = 0; i < table->dbt_len; i++) {
 778  759                  bp = &buckets[i];
 779  760                  for (l = bp->dbk_head; l; l = l->next) {
 780  761                          cp = (rfs4_client_t *)l->entry->dbe_data;
 781  762                          cp->rc_last_access = gethrestime_sec();
 782  763                  }
 783  764          }
 784  765  
 785  766          return (B_TRUE);
 786  767  }
 787  768  
 788  769  /*
 789  770   * Given a table, lock each of the buckets and walk all entries (in
 790  771   * turn locking those) and calling the provided "callout" function
 791  772   * with the provided parameter.  Obviously used to iterate across all
 792  773   * entries in a particular table via the database locking hierarchy.
 793  774   * Obviously the caller must not hold locks on any of the entries in
 794  775   * the specified table.
 795  776   */
 796  777  void
 797  778  rfs4_dbe_walk(rfs4_table_t *table,
 798  779      void (*callout)(rfs4_entry_t, void *),
 799  780      void *data)
 800  781  {
 801  782          rfs4_bucket_t *buckets = table->dbt_indices->dbi_buckets, *bp;
 802  783          rfs4_link_t *l;
 803  784          rfs4_dbe_t *entry;
 804  785          int i;
 805  786  
 806  787          NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
 807  788              (CE_NOTE, "Walking entries in %s", table->dbt_name));
 808  789  
 809  790          /* Walk the buckets looking for entries to release/destroy */
 810  791          for (i = 0; i < table->dbt_len; i++) {
 811  792                  bp = &buckets[i];
 812  793                  rw_enter(bp->dbk_lock, RW_READER);
 813  794                  for (l = bp->dbk_head; l; l = l->next) {
 814  795                          entry = l->entry;
 815  796                          mutex_enter(entry->dbe_lock);
 816  797                          (*callout)(entry->dbe_data, data);
 817  798                          mutex_exit(entry->dbe_lock);
 818  799                  }
 819  800                  rw_exit(bp->dbk_lock);
 820  801          }
 821  802  
 822  803          NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
 823  804              (CE_NOTE, "Walking entries complete %s", table->dbt_name));
 824  805  }
 825  806  
 826  807  
 827  808  static void
 828  809  rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
 829  810  {
 830  811          rfs4_index_t *idx = table->dbt_indices;
 831  812          rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
 832  813          rfs4_link_t *l, *t;
 833  814          rfs4_dbe_t *entry;
  
    | 
      ↓ open down ↓ | 
    667 lines elided | 
    
      ↑ open up ↑ | 
  
 834  815          bool_t found;
 835  816          int i;
 836  817          int count = 0;
 837  818  
 838  819          NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
 839  820              (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
 840  821              desired, cache_time, table->dbt_name));
 841  822  
 842  823          /* Walk the buckets looking for entries to release/destroy */
 843  824          for (i = 0; i < table->dbt_len; i++) {
 844      -                int retries = 0;
 845  825                  bp = &buckets[i];
 846  826                  do {
 847  827                          found = FALSE;
 848  828                          rw_enter(bp->dbk_lock, RW_READER);
 849  829                          for (l = bp->dbk_head; l; l = l->next) {
 850  830                                  entry = l->entry;
 851      -                                mutex_enter(entry->dbe_lock);
 852      -                                ASSERT(entry->dbe_refcnt != 0);
 853  831                                  /*
 854  832                                   * Examine an entry.  Ref count of 1 means
 855  833                                   * that the only reference is for the hash
 856  834                                   * table reference.
 857  835                                   */
 858      -                                if (entry->dbe_refcnt != 1) {
 859      -                                        mutex_exit(entry->dbe_lock);
      836 +                                if (entry->dbe_refcnt != 1)
 860  837                                          continue;
 861      -                                }
      838 +                                mutex_enter(entry->dbe_lock);
 862  839                                  if ((entry->dbe_refcnt == 1) &&
 863  840                                      (table->dbt_reaper_shutdown ||
 864  841                                      table->dbt_expiry == NULL ||
 865  842                                      (*table->dbt_expiry)(entry->dbe_data))) {
 866      -                                        rfs4_dbe_rele_nolock(entry);
      843 +                                        entry->dbe_refcnt--;
 867  844                                          count++;
 868  845                                          found = TRUE;
 869  846                                  }
 870  847                                  mutex_exit(entry->dbe_lock);
 871  848                          }
 872  849                          if (found) {
 873  850                                  if (!rw_tryupgrade(bp->dbk_lock)) {
 874  851                                          rw_exit(bp->dbk_lock);
 875  852                                          rw_enter(bp->dbk_lock, RW_WRITER);
 876  853                                  }
 877  854  
 878  855                                  l = bp->dbk_head;
 879  856                                  while (l) {
 880  857                                          t = l;
 881  858                                          entry = t->entry;
 882  859                                          l = l->next;
 883      -                                        mutex_enter(entry->dbe_lock);
 884  860                                          if (entry->dbe_refcnt == 0) {
 885  861                                                  DEQUEUE(bp->dbk_head, t);
 886      -                                                mutex_exit(entry->dbe_lock);
 887  862                                                  t->next = NULL;
 888  863                                                  t->prev = NULL;
 889  864                                                  INVALIDATE_ADDR(t->entry);
 890  865                                                  rfs4_dbe_destroy(entry);
 891      -                                        } else
 892      -                                                mutex_exit(entry->dbe_lock);
      866 +                                        }
 893  867                                  }
 894  868                          }
 895  869                          rw_exit(bp->dbk_lock);
 896  870                          /*
 897  871                           * delay slightly if there is more work to do
 898  872                           * with the expectation that other reaper
 899  873                           * threads are freeing data structures as well
 900  874                           * and in turn will reduce ref counts on
 901  875                           * entries in this table allowing them to be
 902  876                           * released.  This is only done in the
 903  877                           * instance that the tables are being shut down.
 904  878                           */
 905      -                        if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) {
      879 +                        if (table->dbt_reaper_shutdown && bp->dbk_head != NULL)
 906  880                                  delay(hz/100);
 907      -                                retries++;
 908      -                        }
 909  881                  /*
 910  882                   * If this is a table shutdown, keep going until
 911  883                   * everything is gone
 912  884                   */
 913      -                } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL && retries < 5);
      885 +                } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL);
 914  886  
 915  887                  if (!table->dbt_reaper_shutdown && desired && count >= desired)
 916  888                          break;
 917  889          }
 918  890  
 919  891          NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
 920  892              (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
 921  893              count, cache_time, table->dbt_name));
 922  894  }
 923  895  
 924  896  static void
 925  897  reaper_thread(caddr_t *arg)
 926  898  {
 927  899          rfs4_table_t    *table = (rfs4_table_t *)arg;
 928  900          clock_t          rc;
 929  901  
 930  902          NFS4_DEBUG(table->dbt_debug,
 931  903              (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
 932  904  
 933  905          CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
 934  906              callb_generic_cpr, "nfsv4Reaper");
 935  907  
 936  908          mutex_enter(&table->dbt_reaper_cv_lock);
 937  909          do {
 938  910                  CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info);
 939  911                  rc = cv_reltimedwait_sig(&table->dbt_reaper_wait,
 940  912                      &table->dbt_reaper_cv_lock,
 941  913                      SEC_TO_TICK(table->dbt_id_reap), TR_CLOCK_TICK);
 942  914                  CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info,
 943  915                      &table->dbt_reaper_cv_lock);
 944  916                  rfs4_dbe_reap(table, table->dbt_max_cache_time, 0);
 945  917          } while (rc != 0 && table->dbt_reaper_shutdown == FALSE);
 946  918  
 947  919          CALLB_CPR_EXIT(&table->dbt_reaper_cpr_info);
 948  920  
 949  921          NFS4_DEBUG(table->dbt_debug,
 950  922              (CE_NOTE, "rfs4_reaper_thread exiting for %s", table->dbt_name));
 951  923  
 952  924          /* Notify the database shutdown processing that the table is shutdown */
 953  925          mutex_enter(table->dbt_db->db_lock);
 954  926          table->dbt_db->db_shutdown_count--;
 955  927          cv_signal(&table->dbt_db->db_shutdown_wait);
 956  928          mutex_exit(table->dbt_db->db_lock);
 957  929          zthread_exit();
 958  930  }
 959  931  
 960  932  static void
 961  933  rfs4_start_reaper(rfs4_table_t *table)
 962  934  {
 963  935          if (table->dbt_max_cache_time == 0)
 964  936                  return;
 965  937  
 966  938          (void) zthread_create(NULL, 0, reaper_thread, table, 0,
 967  939              minclsyspri);
 968  940  }
 969  941  
 970  942  #ifdef DEBUG
 971  943  void
 972  944  rfs4_dbe_debug(rfs4_dbe_t *entry)
 973  945  {
 974  946          cmn_err(CE_NOTE, "Entry %p from table %s",
 975  947              (void *)entry, entry->dbe_table->dbt_name);
 976  948          cmn_err(CE_CONT, "\trefcnt = %d id = %d",
 977  949              entry->dbe_refcnt, entry->dbe_id);
 978  950  }
 979  951  #endif
  
    | 
      ↓ open down ↓ | 
    56 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX