Print this page
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/sharefs/sharetab.c
          +++ new/usr/src/uts/common/fs/sharefs/sharetab.c
↓ open down ↓ 15 lines elided ↑ open up ↑
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
       26 +/*
       27 + * Copyright 2018 Nexenta Systems, Inc.
       28 + */
       29 +
  26   30  #include <sys/types.h>
  27   31  #include <sys/types32.h>
  28   32  #include <sys/param.h>
  29   33  #include <sys/systm.h>
  30   34  #include <rpc/types.h>
  31   35  #include <sys/vfs.h>
  32   36  #include <sys/siginfo.h>
  33   37  #include <sys/proc.h>           /* for exit() declaration */
  34   38  #include <sys/kmem.h>
  35   39  #include <sys/pathname.h>
↓ open down ↓ 4 lines elided ↑ open up ↑
  40   44  #include <sys/policy.h>
  41   45  
  42   46  #include <sharefs/sharefs.h>
  43   47  
  44   48  /*
  45   49   * A macro to avoid cut-and-paste errors on getting a string field
  46   50   * from user-land.
  47   51   */
  48   52  #define SHARETAB_COPYIN(field)                                          \
  49   53          if (copyinstr(STRUCT_FGETP(u_sh, sh_##field),                   \
  50      -                        buf,                                            \
  51      -                        bufsz + 1,      /* Add one for extra NUL */     \
  52      -                        &len)) {                                        \
       54 +            buf,                                                        \
       55 +            bufsz + 1,  /* Add one for extra NUL */                     \
       56 +            &len)) {                                                    \
  53   57                  error = EFAULT;                                         \
  54   58                  goto cleanup;                                           \
  55   59          }                                                               \
  56      -        /*                                                              \
  57      -         * Need to remove 1 because copyinstr() counts the NUL.         \
  58      -         */                                                             \
       60 +        /* Need to remove 1 because copyinstr() counts the NUL */       \
  59   61          len--;                                                          \
  60   62          sh->sh_##field = kmem_alloc(len + 1, KM_SLEEP);                 \
  61   63          bcopy(buf, sh->sh_##field, len);                                \
  62   64          sh->sh_##field[len] = '\0';                                     \
  63   65          shl.shl_##field = (int)len;                                     \
  64   66          sh->sh_size += shl.shl_##field; /* Debug counting */
  65   67  
  66   68  #define SHARETAB_DELETE_FIELD(field)                                    \
  67      -        if (sh->sh_##field) {                                           \
       69 +        if (sh->sh_##field != NULL) {                                   \
  68   70                  kmem_free(sh->sh_##field,                               \
  69      -                        shl ? shl->shl_##field + 1 :                    \
  70      -                        strlen(sh->sh_##field) + 1);                    \
       71 +                    shl ? shl->shl_##field + 1 :                        \
       72 +                    strlen(sh->sh_##field) + 1);                        \
  71   73          }
  72   74  
  73      -sharetab_t      *sharefs_sharetab = NULL;       /* The incore sharetab. */
  74      -size_t          sharetab_size;
  75      -uint_t          sharetab_count;
       75 +static zone_key_t sharetab_zone_key;
  76   76  
  77      -krwlock_t       sharetab_lock;  /* lock to protect the cached sharetab */
  78      -
  79      -krwlock_t       sharefs_lock;   /* lock to protect the vnode ops */
  80      -
  81      -timestruc_t     sharetab_mtime;
  82      -timestruc_t     sharetab_snap_time;
  83      -
  84      -uint_t          sharetab_generation;    /* Only increments and wraps! */
  85      -
  86   77  /*
  87   78   * Take care of cleaning up a share.
  88   79   * If passed in a length array, use it to determine how much
  89   80   * space to clean up. Else, figure that out.
  90   81   */
  91   82  static void
  92   83  sharefree(share_t *sh, sharefs_lens_t *shl)
  93   84  {
  94      -        if (!sh)
       85 +        if (sh == NULL)
  95   86                  return;
  96   87  
  97   88          SHARETAB_DELETE_FIELD(path);
  98   89          SHARETAB_DELETE_FIELD(res);
  99   90          SHARETAB_DELETE_FIELD(fstype);
 100   91          SHARETAB_DELETE_FIELD(opts);
 101   92          SHARETAB_DELETE_FIELD(descr);
 102   93  
 103      -        kmem_free(sh, sizeof (share_t));
       94 +        kmem_free(sh, sizeof (*sh));
 104   95  }
 105   96  
 106   97  /*
 107   98   * If there is no error, then this function is responsible for
 108   99   * cleaning up the memory associated with the share argument.
 109  100   */
 110  101  static int
 111      -sharefs_remove(share_t *sh, sharefs_lens_t *shl)
      102 +sharefs_remove(sharetab_globals_t *sg, share_t *sh, sharefs_lens_t *shl)
 112  103  {
 113  104          int             iHash;
 114  105          sharetab_t      *sht;
 115  106          share_t         *s, *p;
 116  107          int             iPath;
 117  108  
 118  109          if (!sh)
 119  110                  return (ENOENT);
 120  111  
 121      -        rw_enter(&sharetab_lock, RW_WRITER);
 122      -        for (sht = sharefs_sharetab; sht != NULL; sht = sht->s_next) {
 123      -                if (strcmp(sh->sh_fstype, sht->s_fstype) == 0) {
      112 +        rw_enter(&sg->sharetab_lock, RW_WRITER);
      113 +        for (sht = sg->sharefs_sharetab; sht != NULL; sht = sht->s_next) {
      114 +                if (strcmp(sh->sh_fstype, sht->s_fstype) == 0)
 124  115                          break;
 125      -                }
 126  116          }
 127  117  
 128  118          /*
 129  119           * There does not exist a fstype in memory which
 130  120           * matches the share passed in.
 131  121           */
 132      -        if (!sht) {
 133      -                rw_exit(&sharetab_lock);
      122 +        if (sht == NULL) {
      123 +                rw_exit(&sg->sharetab_lock);
 134  124                  return (ENOENT);
 135  125          }
 136  126  
 137      -        iPath = shl ? shl->shl_path : strlen(sh->sh_path);
      127 +        iPath = shl != NULL ? shl->shl_path : strlen(sh->sh_path);
 138  128          iHash = pkp_tab_hash(sh->sh_path, strlen(sh->sh_path));
 139  129  
 140  130          /*
 141  131           * Now walk down the hash table and find the entry to free!
 142  132           */
 143  133          for (p = NULL, s = sht->s_buckets[iHash].ssh_sh;
 144  134              s != NULL; s = s->sh_next) {
 145  135                  /*
 146  136                   * We need exact matches.
 147  137                   */
 148  138                  if (strcmp(sh->sh_path, s->sh_path) == 0 &&
 149  139                      strlen(s->sh_path) == iPath) {
 150      -                        if (p) {
      140 +                        if (p != NULL)
 151  141                                  p->sh_next = s->sh_next;
 152      -                        } else {
      142 +                        else
 153  143                                  sht->s_buckets[iHash].ssh_sh = s->sh_next;
 154      -                        }
 155  144  
 156  145                          ASSERT(sht->s_buckets[iHash].ssh_count != 0);
 157  146                          atomic_dec_32(&sht->s_buckets[iHash].ssh_count);
 158  147                          atomic_dec_32(&sht->s_count);
 159      -                        atomic_dec_32(&sharetab_count);
      148 +                        atomic_dec_32(&sg->sharetab_count);
 160  149  
 161      -                        ASSERT(sharetab_size >= s->sh_size);
 162      -                        sharetab_size -= s->sh_size;
      150 +                        ASSERT(sg->sharetab_size >= s->sh_size);
      151 +                        sg->sharetab_size -= s->sh_size;
 163  152  
 164      -                        gethrestime(&sharetab_mtime);
 165      -                        atomic_inc_32(&sharetab_generation);
      153 +                        gethrestime(&sg->sharetab_mtime);
      154 +                        atomic_inc_32(&sg->sharetab_generation);
 166  155  
 167  156                          break;
 168  157                  }
 169  158  
 170  159                  p = s;
 171  160          }
 172  161  
 173      -        rw_exit(&sharetab_lock);
      162 +        rw_exit(&sg->sharetab_lock);
 174  163  
 175      -        if (!s) {
      164 +        if (s == NULL)
 176  165                  return (ENOENT);
 177      -        }
 178  166  
 179  167          s->sh_next = NULL;
 180  168          sharefree(s, NULL);
 181  169  
 182      -        /*
 183      -         * We need to free the share for the caller.
 184      -         */
      170 +        /* We need to free the share for the caller */
 185  171          sharefree(sh, shl);
 186  172  
 187  173          return (0);
 188  174  }
 189  175  
 190  176  /*
 191  177   * The caller must have allocated memory for us to use.
 192  178   */
 193  179  static int
 194      -sharefs_add(share_t *sh, sharefs_lens_t *shl)
      180 +sharefs_add(sharetab_globals_t *sg, share_t *sh, sharefs_lens_t *shl)
 195  181  {
 196  182          int             iHash;
 197  183          sharetab_t      *sht;
 198  184          share_t         *s, *p;
 199  185          int             iPath;
 200  186          int             n;
 201  187  
 202      -        if (!sh) {
      188 +        if (sh == NULL)
 203  189                  return (ENOENT);
 204      -        }
 205  190  
 206      -        /*
 207      -         * We need to find the hash buckets for the fstype.
 208      -         */
 209      -        rw_enter(&sharetab_lock, RW_WRITER);
 210      -        for (sht = sharefs_sharetab; sht != NULL; sht = sht->s_next) {
 211      -                if (strcmp(sh->sh_fstype, sht->s_fstype) == 0) {
      191 +        /* We need to find the hash buckets for the fstype */
      192 +        rw_enter(&sg->sharetab_lock, RW_WRITER);
      193 +        for (sht = sg->sharefs_sharetab; sht != NULL; sht = sht->s_next) {
      194 +                if (strcmp(sh->sh_fstype, sht->s_fstype) == 0)
 212  195                          break;
 213      -                }
 214  196          }
 215  197  
 216      -        /*
 217      -         * Did not exist, so allocate one and add it to the
 218      -         * sharetab.
 219      -         */
 220      -        if (!sht) {
      198 +        /* Did not exist, so allocate one and add it to the sharetab */
      199 +        if (sht == NULL) {
 221  200                  sht = kmem_zalloc(sizeof (*sht), KM_SLEEP);
 222  201                  n = strlen(sh->sh_fstype);
 223  202                  sht->s_fstype = kmem_zalloc(n + 1, KM_SLEEP);
 224  203                  (void) strncpy(sht->s_fstype, sh->sh_fstype, n);
 225  204  
 226      -                sht->s_next = sharefs_sharetab;
 227      -                sharefs_sharetab = sht;
      205 +                sht->s_next = sg->sharefs_sharetab;
      206 +                sg->sharefs_sharetab = sht;
 228  207          }
 229  208  
 230      -        /*
 231      -         * Now we need to find where we have to add the entry.
 232      -         */
      209 +        /* Now we need to find where we have to add the entry */
      210 +        iPath = shl != NULL ? shl->shl_path : strlen(sh->sh_path);
 233  211          iHash = pkp_tab_hash(sh->sh_path, strlen(sh->sh_path));
 234  212  
 235      -        iPath = shl ? shl->shl_path : strlen(sh->sh_path);
 236      -
 237  213          if (shl) {
 238  214                  sh->sh_size = shl->shl_path + shl->shl_res +
 239  215                      shl->shl_fstype + shl->shl_opts + shl->shl_descr;
 240  216          } else {
 241  217                  sh->sh_size = strlen(sh->sh_path) +
 242  218                      strlen(sh->sh_res) + strlen(sh->sh_fstype) +
 243  219                      strlen(sh->sh_opts) + strlen(sh->sh_descr);
 244  220          }
 245  221  
 246      -        /*
 247      -         * We need to account for field seperators and
 248      -         * the EOL.
 249      -         */
      222 +        /* We need to account for field seperators and the EOL */
 250  223          sh->sh_size += 5;
 251  224  
 252      -        /*
 253      -         * Now walk down the hash table and add the new entry!
 254      -         */
      225 +        /* Now walk down the hash table and add the new entry */
 255  226          for (p = NULL, s = sht->s_buckets[iHash].ssh_sh;
 256  227              s != NULL; s = s->sh_next) {
 257  228                  /*
 258  229                   * We need exact matches.
 259  230                   *
 260  231                   * We found a matching path. Either we have a
 261  232                   * duplicate path in a share command or we are
 262  233                   * being asked to replace an existing entry.
 263  234                   */
 264  235                  if (strcmp(sh->sh_path, s->sh_path) == 0 &&
 265  236                      strlen(s->sh_path) == iPath) {
 266      -                        if (p) {
      237 +                        if (p != NULL)
 267  238                                  p->sh_next = sh;
 268      -                        } else {
      239 +                        else
 269  240                                  sht->s_buckets[iHash].ssh_sh = sh;
 270      -                        }
 271  241  
 272  242                          sh->sh_next = s->sh_next;
 273  243  
 274      -                        ASSERT(sharetab_size >= s->sh_size);
 275      -                        sharetab_size -= s->sh_size;
 276      -                        sharetab_size += sh->sh_size;
      244 +                        ASSERT(sg->sharetab_size >= s->sh_size);
      245 +                        sg->sharetab_size -= s->sh_size;
      246 +                        sg->sharetab_size += sh->sh_size;
 277  247  
 278      -                        /*
 279      -                         * Get rid of the old node.
 280      -                         */
      248 +                        /* Get rid of the old node */
 281  249                          sharefree(s, NULL);
 282  250  
 283      -                        gethrestime(&sharetab_mtime);
 284      -                        atomic_inc_32(&sharetab_generation);
      251 +                        gethrestime(&sg->sharetab_mtime);
      252 +                        atomic_inc_32(&sg->sharetab_generation);
 285  253  
 286  254                          ASSERT(sht->s_buckets[iHash].ssh_count != 0);
 287      -                        rw_exit(&sharetab_lock);
      255 +                        rw_exit(&sg->sharetab_lock);
 288  256  
 289  257                          return (0);
 290  258                  }
 291  259  
 292  260                  p = s;
 293  261          }
 294  262  
 295  263          /*
 296  264           * Okay, we have gone through the entire hash chain and not
 297  265           * found a match. We just need to add this node.
 298  266           */
 299  267          sh->sh_next = sht->s_buckets[iHash].ssh_sh;
 300  268          sht->s_buckets[iHash].ssh_sh = sh;
 301  269          atomic_inc_32(&sht->s_buckets[iHash].ssh_count);
 302  270          atomic_inc_32(&sht->s_count);
 303      -        atomic_inc_32(&sharetab_count);
 304      -        sharetab_size += sh->sh_size;
      271 +        atomic_inc_32(&sg->sharetab_count);
      272 +        sg->sharetab_size += sh->sh_size;
 305  273  
 306      -        gethrestime(&sharetab_mtime);
 307      -        atomic_inc_32(&sharetab_generation);
      274 +        gethrestime(&sg->sharetab_mtime);
      275 +        atomic_inc_32(&sg->sharetab_generation);
 308  276  
 309      -        rw_exit(&sharetab_lock);
      277 +        rw_exit(&sg->sharetab_lock);
 310  278  
 311  279          return (0);
 312  280  }
 313  281  
      282 +/* ARGSUSED */
      283 +static void *
      284 +sharetab_zone_init(zoneid_t zoneid)
      285 +{
      286 +        sharetab_globals_t *sg;
      287 +
      288 +        sg = kmem_zalloc(sizeof (*sg), KM_SLEEP);
      289 +
      290 +        rw_init(&sg->sharetab_lock, NULL, RW_DEFAULT, NULL);
      291 +        rw_init(&sg->sharefs_lock, NULL, RW_DEFAULT, NULL);
      292 +
      293 +        sg->sharetab_size = 0;
      294 +        sg->sharetab_count = 0;
      295 +        sg->sharetab_generation = 1;
      296 +
      297 +        gethrestime(&sg->sharetab_mtime);
      298 +        gethrestime(&sg->sharetab_snap_time);
      299 +
      300 +        return (sg);
      301 +}
      302 +
      303 +/* ARGSUSED */
      304 +static void
      305 +sharetab_zone_fini(zoneid_t zoneid, void *data)
      306 +{
      307 +        sharetab_globals_t *sg = data;
      308 +
      309 +        rw_destroy(&sg->sharefs_lock);
      310 +        rw_destroy(&sg->sharetab_lock);
      311 +
      312 +        kmem_free(sg, sizeof (*sg));
      313 +}
      314 +
 314  315  void
 315  316  sharefs_sharetab_init(void)
 316  317  {
 317      -        rw_init(&sharetab_lock, NULL, RW_DEFAULT, NULL);
 318      -        rw_init(&sharefs_lock, NULL, RW_DEFAULT, NULL);
      318 +        zone_key_create(&sharetab_zone_key, sharetab_zone_init,
      319 +            NULL, sharetab_zone_fini);
      320 +}
 319  321  
 320      -        sharetab_size = 0;
 321      -        sharetab_count = 0;
 322      -        sharetab_generation = 1;
 323      -
 324      -        gethrestime(&sharetab_mtime);
 325      -        gethrestime(&sharetab_snap_time);
      322 +sharetab_globals_t *
      323 +sharetab_get_globals(zone_t *zone)
      324 +{
      325 +        return (zone_getspecific(sharetab_zone_key, zone));
 326  326  }
 327  327  
 328  328  int
 329  329  sharefs_impl(enum sharefs_sys_op opcode, share_t *sh_in, uint32_t iMaxLen)
 330  330  {
 331  331          int             error = 0;
 332  332          size_t          len;
 333  333          size_t          bufsz;
 334  334          share_t         *sh;
 335      -
 336  335          sharefs_lens_t  shl;
 337      -
 338  336          model_t         model;
 339      -
 340  337          char            *buf = NULL;
      338 +        sharetab_globals_t *sg = sharetab_get_globals(curzone);
 341  339  
 342  340          STRUCT_DECL(share, u_sh);
 343  341  
 344  342          bufsz = iMaxLen;
 345  343  
 346  344          /*
 347  345           * Before we do anything, lets make sure we have
 348  346           * a sharetab in memory if we need one.
 349  347           */
 350      -        rw_enter(&sharetab_lock, RW_READER);
      348 +        rw_enter(&sg->sharetab_lock, RW_READER);
 351  349          switch (opcode) {
 352      -        case (SHAREFS_REMOVE) :
 353      -        case (SHAREFS_REPLACE) :
 354      -                if (!sharefs_sharetab) {
 355      -                        rw_exit(&sharetab_lock);
      350 +        case SHAREFS_REMOVE:
      351 +        case SHAREFS_REPLACE:
      352 +                if (!sg->sharefs_sharetab) {
      353 +                        rw_exit(&sg->sharetab_lock);
 356  354                          return (set_errno(ENOENT));
 357  355                  }
 358  356                  break;
 359      -        case (SHAREFS_ADD) :
 360      -        default :
      357 +        case SHAREFS_ADD:
      358 +        default:
 361  359                  break;
 362  360          }
 363      -        rw_exit(&sharetab_lock);
      361 +        rw_exit(&sg->sharetab_lock);
 364  362  
 365  363          model = get_udatamodel();
 366  364  
 367  365          /*
 368  366           * Initialize the data pointers.
 369  367           */
 370  368          STRUCT_INIT(u_sh, model);
 371      -        if (copyin(sh_in, STRUCT_BUF(u_sh), STRUCT_SIZE(u_sh))) {
      369 +        if (copyin(sh_in, STRUCT_BUF(u_sh), STRUCT_SIZE(u_sh)))
 372  370                  return (set_errno(EFAULT));
 373      -        }
 374  371  
 375      -        /*
 376      -         * Get the share.
 377      -         */
      372 +        /* Get the share */
 378  373          sh = kmem_zalloc(sizeof (share_t), KM_SLEEP);
 379  374  
 380      -        /*
 381      -         * Get some storage for copying in the strings.
 382      -         */
      375 +        /* Get some storage for copying in the strings */
 383  376          buf = kmem_zalloc(bufsz + 1, KM_SLEEP);
 384  377          bzero(&shl, sizeof (sharefs_lens_t));
 385  378  
 386      -        /*
 387      -         * Only grab these two until we know what we want.
 388      -         */
      379 +        /* Only grab these two until we know what we want */
 389  380          SHARETAB_COPYIN(path);
 390  381          SHARETAB_COPYIN(fstype);
 391  382  
 392  383          switch (opcode) {
 393      -        case (SHAREFS_ADD) :
 394      -        case (SHAREFS_REPLACE) :
      384 +        case SHAREFS_ADD:
      385 +        case SHAREFS_REPLACE:
 395  386                  SHARETAB_COPYIN(res);
 396  387                  SHARETAB_COPYIN(opts);
 397  388                  SHARETAB_COPYIN(descr);
 398      -
 399      -                error = sharefs_add(sh, &shl);
      389 +                error = sharefs_add(sg, sh, &shl);
 400  390                  break;
 401      -
 402      -        case (SHAREFS_REMOVE) :
 403      -
 404      -                error = sharefs_remove(sh, &shl);
      391 +        case SHAREFS_REMOVE:
      392 +                error = sharefs_remove(sg, sh, &shl);
 405  393                  break;
 406      -
 407  394          default:
 408  395                  error = EINVAL;
 409  396                  break;
 410  397          }
 411  398  
 412  399  cleanup:
 413      -
 414  400          /*
 415  401           * If there is no error, then we have stashed the structure
 416  402           * away in the sharetab hash table or have deleted it.
 417  403           *
 418  404           * Either way, the only reason to blow away the data is if
 419  405           * there was an error.
 420  406           */
 421      -        if (error != 0) {
      407 +        if (error != 0)
 422  408                  sharefree(sh, &shl);
 423      -        }
 424  409  
 425      -        if (buf) {
      410 +        if (buf != NULL)
 426  411                  kmem_free(buf, bufsz + 1);
 427      -        }
 428  412  
 429      -        return ((error != 0) ? set_errno(error) : 0);
      413 +        return (error != 0 ? set_errno(error) : 0);
 430  414  }
 431  415  
 432  416  int
 433  417  sharefs(enum sharefs_sys_op opcode, share_t *sh_in, uint32_t iMaxLen)
 434  418  {
 435      -        if (secpolicy_sys_config(CRED(), B_FALSE) != 0)
 436      -                return (set_errno(EPERM));
      419 +        /*
      420 +         * If we're in the global zone PRIV_SYS_CONFIG gives us the
      421 +         * priviledges needed to act on sharetab. However if we're in
      422 +         * a non-global zone PRIV_SYS_CONFIG is not allowed. To work
      423 +         * around this issue PRIV_SYS_NFS is used in this case.
      424 +         *
      425 +         * TODO: This basically overloads the definition/use of
      426 +         * PRIV_SYS_NFS to work around the limitation of PRIV_SYS_CONFIG
      427 +         * in a zone. Solaris 11 solved this by implementing a PRIV_SYS_SHARE
      428 +         * we should do the same and replace the use of PRIV_SYS_NFS here and
      429 +         * in zfs_secpolicy_share.
      430 +         */
      431 +        if (INGLOBALZONE(curproc)) {
      432 +                if (secpolicy_sys_config(CRED(), B_FALSE) != 0)
      433 +                        return (set_errno(EPERM));
      434 +        } else {
      435 +                /* behave like zfs_secpolicy_share() */
      436 +                if (secpolicy_nfs(CRED()) != 0)
      437 +                        return (set_errno(EPERM));
 437  438  
      439 +        }
 438  440          return (sharefs_impl(opcode, sh_in, iMaxLen));
 439  441  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX