Print this page
    
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/sys/zone.h
          +++ new/usr/src/uts/common/sys/zone.h
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  
    | 
      ↓ open down ↓ | 
    10 lines elided | 
    
      ↑ open up ↑ | 
  
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
       21 +
  21   22  /*
  22   23   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
       24 + */
       25 +
       26 +/*
  23   27   * Copyright 2015 Joyent, Inc. All rights reserved.
  24      - * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
       28 + * Copyright 2018 Nexenta Systems, Inc.
  25   29   * Copyright 2014 Igor Kozhukhov <ikozhukhov@gmail.com>.
  26   30   */
  27   31  
  28   32  #ifndef _SYS_ZONE_H
  29   33  #define _SYS_ZONE_H
  30   34  
  31   35  #include <sys/types.h>
  32   36  #include <sys/mutex.h>
  33   37  #include <sys/param.h>
  34   38  #include <sys/rctl.h>
  35   39  #include <sys/ipc_rctl.h>
  36   40  #include <sys/pset.h>
  37   41  #include <sys/tsol/label.h>
  38   42  #include <sys/cred.h>
  39   43  #include <sys/netstack.h>
  40   44  #include <sys/uadmin.h>
  41   45  #include <sys/ksynch.h>
  42   46  #include <sys/socket_impl.h>
  43   47  #include <sys/secflags.h>
  44   48  #include <netinet/in.h>
  45   49  
  46   50  #ifdef  __cplusplus
  47   51  extern "C" {
  48   52  #endif
  49   53  
  50   54  /*
  51   55   * NOTE
  52   56   *
  53   57   * The contents of this file are private to the implementation of
  54   58   * Solaris and are subject to change at any time without notice.
  55   59   * Applications and drivers using these interfaces may fail to
  56   60   * run on future releases.
  57   61   */
  58   62  
  59   63  /* Available both in kernel and for user space */
  60   64  
  61   65  /* zone id restrictions and special ids */
  62   66  #define MAX_ZONEID      9999
  63   67  #define MIN_USERZONEID  1       /* lowest user-creatable zone ID */
  64   68  #define MIN_ZONEID      0       /* minimum zone ID on system */
  65   69  #define GLOBAL_ZONEID   0
  66   70  #define ZONEID_WIDTH    4       /* for printf */
  67   71  
  68   72  /*
  69   73   * Special zoneid_t token to refer to all zones.
  70   74   */
  71   75  #define ALL_ZONES       (-1)
  72   76  
  73   77  /* system call subcodes */
  74   78  #define ZONE_CREATE             0
  75   79  #define ZONE_DESTROY            1
  76   80  #define ZONE_GETATTR            2
  77   81  #define ZONE_ENTER              3
  78   82  #define ZONE_LIST               4
  79   83  #define ZONE_SHUTDOWN           5
  80   84  #define ZONE_LOOKUP             6
  81   85  #define ZONE_BOOT               7
  82   86  #define ZONE_VERSION            8
  83   87  #define ZONE_SETATTR            9
  84   88  #define ZONE_ADD_DATALINK       10
  85   89  #define ZONE_DEL_DATALINK       11
  86   90  #define ZONE_CHECK_DATALINK     12
  87   91  #define ZONE_LIST_DATALINK      13
  88   92  
  89   93  /* zone attributes */
  90   94  #define ZONE_ATTR_ROOT          1
  91   95  #define ZONE_ATTR_NAME          2
  92   96  #define ZONE_ATTR_STATUS        3
  93   97  #define ZONE_ATTR_PRIVSET       4
  94   98  #define ZONE_ATTR_UNIQID        5
  95   99  #define ZONE_ATTR_POOLID        6
  96  100  #define ZONE_ATTR_INITPID       7
  97  101  #define ZONE_ATTR_SLBL          8
  98  102  #define ZONE_ATTR_INITNAME      9
  99  103  #define ZONE_ATTR_BOOTARGS      10
 100  104  #define ZONE_ATTR_BRAND         11
 101  105  #define ZONE_ATTR_PHYS_MCAP     12
 102  106  #define ZONE_ATTR_SCHED_CLASS   13
 103  107  #define ZONE_ATTR_FLAGS         14
 104  108  #define ZONE_ATTR_HOSTID        15
 105  109  #define ZONE_ATTR_FS_ALLOWED    16
 106  110  #define ZONE_ATTR_NETWORK       17
 107  111  #define ZONE_ATTR_INITNORESTART 20
 108  112  #define ZONE_ATTR_SECFLAGS      21
 109  113  
 110  114  /* Start of the brand-specific attribute namespace */
 111  115  #define ZONE_ATTR_BRAND_ATTRS   32768
 112  116  
 113  117  #define ZONE_FS_ALLOWED_MAX     1024
 114  118  
 115  119  #define ZONE_EVENT_CHANNEL      "com.sun:zones:status"
 116  120  #define ZONE_EVENT_STATUS_CLASS "status"
 117  121  #define ZONE_EVENT_STATUS_SUBCLASS      "change"
 118  122  
 119  123  #define ZONE_EVENT_UNINITIALIZED        "uninitialized"
 120  124  #define ZONE_EVENT_INITIALIZED          "initialized"
 121  125  #define ZONE_EVENT_READY                "ready"
 122  126  #define ZONE_EVENT_RUNNING              "running"
 123  127  #define ZONE_EVENT_SHUTTING_DOWN        "shutting_down"
 124  128  
 125  129  #define ZONE_CB_NAME            "zonename"
 126  130  #define ZONE_CB_NEWSTATE        "newstate"
 127  131  #define ZONE_CB_OLDSTATE        "oldstate"
 128  132  #define ZONE_CB_TIMESTAMP       "when"
 129  133  #define ZONE_CB_ZONEID          "zoneid"
 130  134  
 131  135  /*
 132  136   * Exit values that may be returned by scripts or programs invoked by various
 133  137   * zone commands.
 134  138   *
 135  139   * These are defined as:
 136  140   *
 137  141   *      ZONE_SUBPROC_OK
 138  142   *      ===============
 139  143   *      The subprocess completed successfully.
 140  144   *
 141  145   *      ZONE_SUBPROC_USAGE
 142  146   *      ==================
 143  147   *      The subprocess failed with a usage message, or a usage message should
 144  148   *      be output in its behalf.
 145  149   *
 146  150   *      ZONE_SUBPROC_NOTCOMPLETE
 147  151   *      ========================
 148  152   *      The subprocess did not complete, but the actions performed by the
 149  153   *      subprocess require no recovery actions by the user.
 150  154   *
 151  155   *      For example, if the subprocess were called by "zoneadm install," the
 152  156   *      installation of the zone did not succeed but the user need not perform
 153  157   *      a "zoneadm uninstall" before attempting another install.
 154  158   *
 155  159   *      ZONE_SUBPROC_FATAL
 156  160   *      ==================
 157  161   *      The subprocess failed in a fatal manner, usually one that will require
 158  162   *      some type of recovery action by the user.
 159  163   *
 160  164   *      For example, if the subprocess were called by "zoneadm install," the
 161  165   *      installation of the zone did not succeed and the user will need to
 162  166   *      perform a "zoneadm uninstall" before another install attempt is
 163  167   *      possible.
 164  168   *
 165  169   *      The non-success exit values are large to avoid accidental collision
 166  170   *      with values used internally by some commands (e.g. "Z_ERR" and
 167  171   *      "Z_USAGE" as used by zoneadm.)
 168  172   */
 169  173  #define ZONE_SUBPROC_OK                 0
 170  174  #define ZONE_SUBPROC_USAGE              253
 171  175  #define ZONE_SUBPROC_NOTCOMPLETE        254
 172  176  #define ZONE_SUBPROC_FATAL              255
 173  177  
 174  178  #ifdef _SYSCALL32
 175  179  typedef struct {
 176  180          caddr32_t zone_name;
 177  181          caddr32_t zone_root;
 178  182          caddr32_t zone_privs;
 179  183          size32_t zone_privssz;
 180  184          caddr32_t rctlbuf;
 181  185          size32_t rctlbufsz;
 182  186          caddr32_t extended_error;
 183  187          caddr32_t zfsbuf;
 184  188          size32_t  zfsbufsz;
 185  189          int match;                      /* match level */
 186  190          uint32_t doi;                   /* DOI for label */
 187  191          caddr32_t label;                /* label associated with zone */
 188  192          int flags;
 189  193  } zone_def32;
 190  194  #endif
 191  195  typedef struct {
 192  196          const char *zone_name;
 193  197          const char *zone_root;
 194  198          const struct priv_set *zone_privs;
 195  199          size_t zone_privssz;
 196  200          const char *rctlbuf;
 197  201          size_t rctlbufsz;
 198  202          int *extended_error;
 199  203          const char *zfsbuf;
 200  204          size_t zfsbufsz;
 201  205          int match;                      /* match level */
 202  206          uint32_t doi;                   /* DOI for label */
 203  207          const bslabel_t *label;         /* label associated with zone */
 204  208          int flags;
 205  209  } zone_def;
 206  210  
 207  211  /* extended error information */
 208  212  #define ZE_UNKNOWN      0       /* No extended error info */
 209  213  #define ZE_CHROOTED     1       /* tried to zone_create from chroot */
 210  214  #define ZE_AREMOUNTS    2       /* there are mounts within the zone */
 211  215  #define ZE_LABELINUSE   3       /* label is already in use by some other zone */
 212  216  
 213  217  /*
 214  218   * zone_status values
 215  219   *
 216  220   * You must modify zone_status_names in mdb(1M)'s genunix module
 217  221   * (genunix/zone.c) when you modify this enum.
 218  222   */
 219  223  typedef enum {
 220  224          ZONE_IS_UNINITIALIZED = 0,
 221  225          ZONE_IS_INITIALIZED,
 222  226          ZONE_IS_READY,
 223  227          ZONE_IS_BOOTING,
 224  228          ZONE_IS_RUNNING,
 225  229          ZONE_IS_SHUTTING_DOWN,
 226  230          ZONE_IS_EMPTY,
 227  231          ZONE_IS_DOWN,
 228  232          ZONE_IS_DYING,
 229  233          ZONE_IS_DEAD
 230  234  } zone_status_t;
 231  235  #define ZONE_MIN_STATE          ZONE_IS_UNINITIALIZED
 232  236  #define ZONE_MAX_STATE          ZONE_IS_DEAD
 233  237  
 234  238  /*
 235  239   * Valid commands which may be issued by zoneadm to zoneadmd.  The kernel also
 236  240   * communicates with zoneadmd, but only uses Z_REBOOT and Z_HALT.
 237  241   */
 238  242  typedef enum zone_cmd {
 239  243          Z_READY, Z_BOOT, Z_FORCEBOOT, Z_REBOOT, Z_HALT, Z_NOTE_UNINSTALLING,
 240  244          Z_MOUNT, Z_FORCEMOUNT, Z_UNMOUNT, Z_SHUTDOWN
 241  245  } zone_cmd_t;
 242  246  
 243  247  /*
 244  248   * The structure of a request to zoneadmd.
 245  249   */
 246  250  typedef struct zone_cmd_arg {
 247  251          uint64_t        uniqid;         /* unique "generation number" */
 248  252          zone_cmd_t      cmd;            /* requested action */
 249  253          uint32_t        _pad;           /* need consistent 32/64 bit alignmt */
 250  254          char locale[MAXPATHLEN];        /* locale in which to render messages */
 251  255          char bootbuf[BOOTARGS_MAX];     /* arguments passed to zone_boot() */
 252  256  } zone_cmd_arg_t;
 253  257  
 254  258  /*
 255  259   * Structure of zoneadmd's response to a request.  A NULL return value means
 256  260   * the caller should attempt to restart zoneadmd and retry.
 257  261   */
 258  262  typedef struct zone_cmd_rval {
 259  263          int rval;                       /* return value of request */
 260  264          char errbuf[1]; /* variable-sized buffer containing error messages */
 261  265  } zone_cmd_rval_t;
 262  266  
 263  267  /*
 264  268   * The zone support infrastructure uses the zone name as a component
 265  269   * of unix domain (AF_UNIX) sockets, which are limited to 108 characters
 266  270   * in length, so ZONENAME_MAX is limited by that.
 267  271   */
 268  272  #define ZONENAME_MAX            64
 269  273  
 270  274  #define GLOBAL_ZONENAME         "global"
 271  275  
 272  276  /*
 273  277   * Extended Regular expression (see regex(5)) which matches all valid zone
 274  278   * names.
 275  279   */
 276  280  #define ZONENAME_REGEXP         "[a-zA-Z0-9][-_.a-zA-Z0-9]{0,62}"
 277  281  
 278  282  /*
 279  283   * Where the zones support infrastructure places temporary files.
 280  284   */
 281  285  #define ZONES_TMPDIR            "/var/run/zones"
 282  286  
 283  287  /*
 284  288   * The path to the door used by clients to communicate with zoneadmd.
 285  289   */
 286  290  #define ZONE_DOOR_PATH          ZONES_TMPDIR "/%s.zoneadmd_door"
 287  291  
 288  292  
 289  293  /* zone_flags */
 290  294  /*
 291  295   * Threads that read or write the following flag must hold zone_lock.
 292  296   */
 293  297  #define ZF_REFCOUNTS_LOGGED     0x1     /* a thread logged the zone's refs */
 294  298  
 295  299  /*
 296  300   * The following threads are set when the zone is created and never changed.
 297  301   * Threads that test for these flags don't have to hold zone_lock.
 298  302   */
 299  303  #define ZF_HASHED_LABEL         0x2     /* zone has a unique label */
 300  304  #define ZF_IS_SCRATCH           0x4     /* scratch zone */
 301  305  #define ZF_NET_EXCL             0x8     /* Zone has an exclusive IP stack */
 302  306  
 303  307  
 304  308  /* zone_create flags */
 305  309  #define ZCF_NET_EXCL            0x1     /* Create a zone with exclusive IP */
 306  310  
 307  311  /* zone network properties */
 308  312  #define ZONE_NETWORK_ADDRESS    1
 309  313  #define ZONE_NETWORK_DEFROUTER  2
 310  314  
 311  315  #define ZONE_NET_ADDRNAME       "address"
 312  316  #define ZONE_NET_RTRNAME        "route"
 313  317  
 314  318  typedef struct zone_net_data {
 315  319          int zn_type;
 316  320          int zn_len;
 317  321          datalink_id_t zn_linkid;
 318  322          uint8_t zn_val[1];
 319  323  } zone_net_data_t;
 320  324  
 321  325  
 322  326  #ifdef _KERNEL
 323  327  
 324  328  /*
 325  329   * We need to protect the definition of 'list_t' from userland applications and
 326  330   * libraries which may be defining ther own versions.
 327  331   */
 328  332  #include <sys/list.h>
 329  333  #include <sys/loadavg.h>
 330  334  
 331  335  #define GLOBAL_ZONEUNIQID       0       /* uniqid of the global zone */
 332  336  
 333  337  struct pool;
 334  338  struct brand;
 335  339  
 336  340  /*
 337  341   * Each of these constants identifies a kernel subsystem that acquires and
 338  342   * releases zone references.  Each subsystem that invokes
 339  343   * zone_hold_ref() and zone_rele_ref() should specify the
 340  344   * zone_ref_subsys_t constant associated with the subsystem.  Tracked holds
 341  345   * help users and developers quickly identify subsystems that stall zone
 342  346   * shutdowns indefinitely.
 343  347   *
 344  348   * NOTE: You must modify zone_ref_subsys_names in usr/src/uts/common/os/zone.c
 345  349   * when you modify this enumeration.
 346  350   */
 347  351  typedef enum zone_ref_subsys {
 348  352          ZONE_REF_NFS,                   /* NFS */
 349  353          ZONE_REF_NFSV4,                 /* NFSv4 */
 350  354          ZONE_REF_SMBFS,                 /* SMBFS */
 351  355          ZONE_REF_MNTFS,                 /* MNTFS */
 352  356          ZONE_REF_LOFI,                  /* LOFI devices */
 353  357          ZONE_REF_VFS,                   /* VFS infrastructure */
 354  358          ZONE_REF_IPC,                   /* IPC infrastructure */
 355  359          ZONE_REF_NUM_SUBSYS             /* This must be the last entry. */
 356  360  } zone_ref_subsys_t;
 357  361  
 358  362  /*
 359  363   * zone_ref represents a general-purpose references to a zone.  Each zone's
 360  364   * references are linked into the zone's zone_t::zone_ref_list.  This allows
 361  365   * debuggers to walk zones' references.
 362  366   */
 363  367  typedef struct zone_ref {
 364  368          struct zone     *zref_zone; /* the zone to which the reference refers */
 365  369          list_node_t     zref_linkage; /* linkage for zone_t::zone_ref_list */
 366  370  } zone_ref_t;
 367  371  
 368  372  /*
 369  373   * Structure to record list of ZFS datasets exported to a zone.
 370  374   */
 371  375  typedef struct zone_dataset {
 372  376          char            *zd_dataset;
 373  377          list_node_t     zd_linkage;
 374  378  } zone_dataset_t;
 375  379  
 376  380  /*
 377  381   * structure for zone kstats
 378  382   */
 379  383  typedef struct zone_kstat {
 380  384          kstat_named_t zk_zonename;
 381  385          kstat_named_t zk_usage;
 382  386          kstat_named_t zk_value;
 383  387  } zone_kstat_t;
 384  388  
 385  389  struct cpucap;
 386  390  
 387  391  typedef struct {
 388  392          kstat_named_t   zm_zonename;
 389  393          kstat_named_t   zm_pgpgin;
 390  394          kstat_named_t   zm_anonpgin;
 391  395          kstat_named_t   zm_execpgin;
 392  396          kstat_named_t   zm_fspgin;
 393  397          kstat_named_t   zm_anon_alloc_fail;
 394  398  } zone_mcap_kstat_t;
 395  399  
 396  400  typedef struct {
 397  401          kstat_named_t   zm_zonename;    /* full name, kstat truncates name */
 398  402          kstat_named_t   zm_utime;
 399  403          kstat_named_t   zm_stime;
 400  404          kstat_named_t   zm_wtime;
 401  405          kstat_named_t   zm_avenrun1;
 402  406          kstat_named_t   zm_avenrun5;
 403  407          kstat_named_t   zm_avenrun15;
 404  408          kstat_named_t   zm_ffcap;
 405  409          kstat_named_t   zm_ffnoproc;
 406  410          kstat_named_t   zm_ffnomem;
 407  411          kstat_named_t   zm_ffmisc;
 408  412          kstat_named_t   zm_nested_intp;
 409  413          kstat_named_t   zm_init_pid;
 410  414          kstat_named_t   zm_boot_time;
 411  415  } zone_misc_kstat_t;
 412  416  
 413  417  typedef struct zone {
 414  418          /*
 415  419           * zone_name is never modified once set.
 416  420           */
 417  421          char            *zone_name;     /* zone's configuration name */
 418  422          /*
 419  423           * zone_nodename and zone_domain are never freed once allocated.
 420  424           */
 421  425          char            *zone_nodename; /* utsname.nodename equivalent */
 422  426          char            *zone_domain;   /* srpc_domain equivalent */
 423  427          /*
 424  428           * zone_hostid is used for per-zone hostid emulation.
 425  429           * Currently it isn't modified after it's set (so no locks protect
 426  430           * accesses), but that might have to change when we allow
 427  431           * administrators to change running zones' properties.
 428  432           *
 429  433           * The global zone's zone_hostid must always be HW_INVALID_HOSTID so
 430  434           * that zone_get_hostid() will function correctly.
 431  435           */
 432  436          uint32_t        zone_hostid;    /* zone's hostid, HW_INVALID_HOSTID */
 433  437                                          /* if not emulated */
 434  438          /*
 435  439           * zone_lock protects the following fields of a zone_t:
 436  440           *      zone_ref
 437  441           *      zone_cred_ref
 438  442           *      zone_subsys_ref
 439  443           *      zone_ref_list
 440  444           *      zone_ntasks
 441  445           *      zone_flags
 442  446           *      zone_zsd
 443  447           *      zone_pfexecd
 444  448           */
 445  449          kmutex_t        zone_lock;
 446  450          /*
 447  451           * zone_linkage is the zone's linkage into the active or
 448  452           * death-row list.  The field is protected by zonehash_lock.
 449  453           */
 450  454          list_node_t     zone_linkage;
 451  455          zoneid_t        zone_id;        /* ID of zone */
 452  456          uint_t          zone_ref;       /* count of zone_hold()s on zone */
 453  457          uint_t          zone_cred_ref;  /* count of zone_hold_cred()s on zone */
 454  458          /*
 455  459           * Fixed-sized array of subsystem-specific reference counts
 456  460           * The sum of all of the counts must be less than or equal to zone_ref.
 457  461           * The array is indexed by the counts' subsystems' zone_ref_subsys_t
 458  462           * constants.
 459  463           */
 460  464          uint_t          zone_subsys_ref[ZONE_REF_NUM_SUBSYS];
 461  465          list_t          zone_ref_list;  /* list of zone_ref_t structs */
 462  466          /*
 463  467           * zone_rootvp and zone_rootpath can never be modified once set.
 464  468           */
 465  469          struct vnode    *zone_rootvp;   /* zone's root vnode */
 466  470          char            *zone_rootpath; /* Path to zone's root + '/' */
 467  471          ushort_t        zone_flags;     /* misc flags */
 468  472          zone_status_t   zone_status;    /* protected by zone_status_lock */
 469  473          uint_t          zone_ntasks;    /* number of tasks executing in zone */
 470  474          kmutex_t        zone_nlwps_lock; /* protects zone_nlwps, and *_nlwps */
 471  475                                          /* counters in projects and tasks */
 472  476                                          /* that are within the zone */
 473  477          rctl_qty_t      zone_nlwps;     /* number of lwps in zone */
 474  478          rctl_qty_t      zone_nlwps_ctl; /* protected by zone_rctls->rcs_lock */
 475  479          rctl_qty_t      zone_shmmax;    /* System V shared memory usage */
 476  480          ipc_rqty_t      zone_ipc;       /* System V IPC id resource usage */
 477  481  
 478  482          uint_t          zone_rootpathlen; /* strlen(zone_rootpath) + 1 */
 479  483          uint32_t        zone_shares;    /* FSS shares allocated to zone */
 480  484          rctl_set_t      *zone_rctls;    /* zone-wide (zone.*) rctls */
 481  485          kmutex_t        zone_mem_lock;  /* protects zone_locked_mem and */
 482  486                                          /* kpd_locked_mem for all */
 483  487                                          /* projects in zone. */
 484  488                                          /* Also protects zone_max_swap */
 485  489                                          /* grab after p_lock, before rcs_lock */
 486  490          rctl_qty_t      zone_locked_mem;        /* bytes of locked memory in */
 487  491                                                  /* zone */
 488  492          rctl_qty_t      zone_locked_mem_ctl;    /* Current locked memory */
 489  493                                                  /* limit.  Protected by */
 490  494                                                  /* zone_rctls->rcs_lock */
 491  495          rctl_qty_t      zone_max_swap; /* bytes of swap reserved by zone */
 492  496          rctl_qty_t      zone_max_swap_ctl;      /* current swap limit. */
 493  497                                                  /* Protected by */
 494  498                                                  /* zone_rctls->rcs_lock */
 495  499          kmutex_t        zone_rctl_lock; /* protects zone_max_lofi */
 496  500          rctl_qty_t      zone_max_lofi; /* lofi devs for zone */
 497  501          rctl_qty_t      zone_max_lofi_ctl;      /* current lofi limit. */
 498  502                                                  /* Protected by */
 499  503                                                  /* zone_rctls->rcs_lock */
 500  504          list_t          zone_zsd;       /* list of Zone-Specific Data values */
 501  505          kcondvar_t      zone_cv;        /* used to signal state changes */
 502  506          struct proc     *zone_zsched;   /* Dummy kernel "zsched" process */
 503  507          pid_t           zone_proc_initpid; /* pid of "init" for this zone */
 504  508          char            *zone_initname; /* fs path to 'init' */
 505  509          int             zone_boot_err;  /* for zone_boot() if boot fails */
 506  510          char            *zone_bootargs; /* arguments passed via zone_boot() */
 507  511          uint64_t        zone_phys_mcap; /* physical memory cap */
 508  512          /*
 509  513           * zone_kthreads is protected by zone_status_lock.
 510  514           */
 511  515          kthread_t       *zone_kthreads; /* kernel threads in zone */
 512  516          struct priv_set *zone_privset;  /* limit set for zone */
 513  517          /*
 514  518           * zone_vfslist is protected by vfs_list_lock().
 515  519           */
 516  520          struct vfs      *zone_vfslist;  /* list of FS's mounted in zone */
 517  521          uint64_t        zone_uniqid;    /* unique zone generation number */
 518  522          struct cred     *zone_kcred;    /* kcred-like, zone-limited cred */
 519  523          /*
 520  524           * zone_pool is protected by pool_lock().
 521  525           */
 522  526          struct pool     *zone_pool;     /* pool the zone is bound to */
 523  527          hrtime_t        zone_pool_mod;  /* last pool bind modification time */
 524  528          /* zone_psetid is protected by cpu_lock */
 525  529          psetid_t        zone_psetid;    /* pset the zone is bound to */
 526  530  
 527  531          time_t          zone_boot_time; /* Similar to boot_time */
 528  532  
 529  533          /*
 530  534           * The following two can be read without holding any locks.  They are
 531  535           * updated under cpu_lock.
 532  536           */
 533  537          int             zone_ncpus;  /* zone's idea of ncpus */
 534  538          int             zone_ncpus_online; /* zone's idea of ncpus_online */
 535  539          /*
 536  540           * List of ZFS datasets exported to this zone.
 537  541           */
 538  542          list_t          zone_datasets;  /* list of datasets */
 539  543  
 540  544          ts_label_t      *zone_slabel;   /* zone sensitivity label */
 541  545          int             zone_match;     /* require label match for packets */
 542  546          tsol_mlp_list_t zone_mlps;      /* MLPs on zone-private addresses */
 543  547  
 544  548          boolean_t       zone_restart_init;      /* Restart init if it dies? */
 545  549          struct brand    *zone_brand;            /* zone's brand */
 546  550          void            *zone_brand_data;       /* store brand specific data */
 547  551          id_t            zone_defaultcid;        /* dflt scheduling class id */
 548  552          kstat_t         *zone_swapresv_kstat;
 549  553          kstat_t         *zone_lockedmem_kstat;
 550  554          /*
 551  555           * zone_dl_list is protected by zone_lock
 552  556           */
 553  557          list_t          zone_dl_list;
 554  558          netstack_t      *zone_netstack;
 555  559          struct cpucap   *zone_cpucap;   /* CPU caps data */
 556  560          /*
 557  561           * Solaris Auditing per-zone audit context
 558  562           */
 559  563          struct au_kcontext      *zone_audit_kctxt;
 560  564          /*
 561  565           * For private use by mntfs.
 562  566           */
 563  567          struct mntelem  *zone_mntfs_db;
 564  568          krwlock_t       zone_mntfs_db_lock;
 565  569  
 566  570          struct klpd_reg         *zone_pfexecd;
 567  571  
 568  572          char            *zone_fs_allowed;
 569  573          rctl_qty_t      zone_nprocs;    /* number of processes in the zone */
 570  574          rctl_qty_t      zone_nprocs_ctl;        /* current limit protected by */
 571  575                                                  /* zone_rctls->rcs_lock */
 572  576          kstat_t         *zone_nprocs_kstat;
 573  577  
 574  578          kmutex_t        zone_mcap_lock; /* protects mcap statistics */
 575  579          kstat_t         *zone_mcap_ksp;
 576  580          zone_mcap_kstat_t *zone_mcap_stats;
 577  581          uint64_t        zone_pgpgin;            /* pages paged in */
 578  582          uint64_t        zone_anonpgin;          /* anon pages paged in */
 579  583          uint64_t        zone_execpgin;          /* exec pages paged in */
 580  584          uint64_t        zone_fspgin;            /* fs pages paged in */
 581  585          uint64_t        zone_anon_alloc_fail;   /* cnt of anon alloc fails */
 582  586  
 583  587          psecflags_t     zone_secflags; /* default zone security-flags */
 584  588  
 585  589          /*
 586  590           * Misc. kstats and counters for zone cpu-usage aggregation.
 587  591           * The zone_Xtime values are the sum of the micro-state accounting
 588  592           * values for all threads that are running or have run in the zone.
 589  593           * This is tracked in msacct.c as threads change state.
 590  594           * The zone_stime is the sum of the LMS_SYSTEM times.
 591  595           * The zone_utime is the sum of the LMS_USER times.
 592  596           * The zone_wtime is the sum of the LMS_WAIT_CPU times.
 593  597           * As with per-thread micro-state accounting values, these values are
 594  598           * not scaled to nanosecs.  The scaling is done by the
 595  599           * zone_misc_kstat_update function when kstats are requested.
 596  600           */
 597  601          kmutex_t        zone_misc_lock;         /* protects misc statistics */
 598  602          kstat_t         *zone_misc_ksp;
 599  603          zone_misc_kstat_t *zone_misc_stats;
 600  604          uint64_t        zone_stime;             /* total system time */
 601  605          uint64_t        zone_utime;             /* total user time */
 602  606          uint64_t        zone_wtime;             /* total time waiting in runq */
 603  607          /* fork-fail kstat tracking */
 604  608          uint32_t        zone_ffcap;             /* hit an rctl cap */
 605  609          uint32_t        zone_ffnoproc;          /* get proc/lwp error */
 606  610          uint32_t        zone_ffnomem;           /* as_dup/memory error */
 607  611          uint32_t        zone_ffmisc;            /* misc. other error */
 608  612  
 609  613          uint32_t        zone_nested_intp;       /* nested interp. kstat */
 610  614  
 611  615          struct loadavg_s zone_loadavg;          /* loadavg for this zone */
 612  616          uint64_t        zone_hp_avenrun[3];     /* high-precision avenrun */
 613  617          int             zone_avenrun[3];        /* FSCALED avg. run queue len */
 614  618  
 615  619          /*
 616  620           * FSS stats updated once per second by fss_decay_usage.
 617  621           */
 618  622          uint32_t        zone_fss_gen;           /* FSS generation cntr */
 619  623          uint64_t        zone_run_ticks;         /* tot # of ticks running */
 620  624  
 621  625          /*
 622  626           * DTrace-private per-zone state
 623  627           */
 624  628          int             zone_dtrace_getf;       /* # of unprivileged getf()s */
 625  629  
 626  630          /*
 627  631           * Synchronization primitives used to synchronize between mounts and
 628  632           * zone creation/destruction.
 629  633           */
  
    | 
      ↓ open down ↓ | 
    595 lines elided | 
    
      ↑ open up ↑ | 
  
 630  634          int             zone_mounts_in_progress;
 631  635          kcondvar_t      zone_mount_cv;
 632  636          kmutex_t        zone_mount_lock;
 633  637  } zone_t;
 634  638  
 635  639  /*
 636  640   * Special value of zone_psetid to indicate that pools are disabled.
 637  641   */
 638  642  #define ZONE_PS_INVAL   PS_MYID
 639  643  
 640      -
 641  644  extern zone_t zone0;
 642  645  extern zone_t *global_zone;
 643  646  extern uint_t maxzones;
 644  647  extern rctl_hndl_t rc_zone_nlwps;
 645  648  extern rctl_hndl_t rc_zone_nprocs;
 646  649  
 647  650  extern long zone(int, void *, void *, void *, void *);
 648  651  extern void zone_zsd_init(void);
 649  652  extern void zone_init(void);
 650  653  extern void zone_hold(zone_t *);
 651  654  extern void zone_rele(zone_t *);
 652  655  extern void zone_init_ref(zone_ref_t *);
 653  656  extern void zone_hold_ref(zone_t *, zone_ref_t *, zone_ref_subsys_t);
 654  657  extern void zone_rele_ref(zone_ref_t *, zone_ref_subsys_t);
 655  658  extern void zone_cred_hold(zone_t *);
 656  659  extern void zone_cred_rele(zone_t *);
 657  660  extern void zone_task_hold(zone_t *);
 658  661  extern void zone_task_rele(zone_t *);
 659  662  extern zone_t *zone_find_by_id(zoneid_t);
 660  663  extern zone_t *zone_find_by_label(const ts_label_t *);
 661  664  extern zone_t *zone_find_by_name(char *);
 662  665  extern zone_t *zone_find_by_any_path(const char *, boolean_t);
 663  666  extern zone_t *zone_find_by_path(const char *);
 664  667  extern zoneid_t getzoneid(void);
 665  668  extern zone_t *zone_find_by_id_nolock(zoneid_t);
 666  669  extern int zone_datalink_walk(zoneid_t, int (*)(datalink_id_t, void *), void *);
 667  670  extern int zone_check_datalink(zoneid_t *, datalink_id_t);
 668  671  extern void zone_loadavg_update();
 669  672  
 670  673  /*
 671  674   * Zone-specific data (ZSD) APIs
 672  675   */
 673  676  /*
 674  677   * The following is what code should be initializing its zone_key_t to if it
 675  678   * calls zone_getspecific() without necessarily knowing that zone_key_create()
 676  679   * has been called on the key.
 677  680   */
 678  681  #define ZONE_KEY_UNINITIALIZED  0
 679  682  
 680  683  typedef uint_t zone_key_t;
 681  684  
 682  685  extern void     zone_key_create(zone_key_t *, void *(*)(zoneid_t),
 683  686      void (*)(zoneid_t, void *), void (*)(zoneid_t, void *));
 684  687  extern int      zone_key_delete(zone_key_t);
 685  688  extern void     *zone_getspecific(zone_key_t, zone_t *);
 686  689  extern int      zone_setspecific(zone_key_t, zone_t *, const void *);
 687  690  
 688  691  /*
 689  692   * The definition of a zsd_entry is truly private to zone.c and is only
 690  693   * placed here so it can be shared with mdb.
 691  694   *
 692  695   * State maintained for each zone times each registered key, which tracks
 693  696   * the state of the create, shutdown and destroy callbacks.
 694  697   *
 695  698   * zsd_flags is used to keep track of pending actions to avoid holding locks
 696  699   * when calling the create/shutdown/destroy callbacks, since doing so
 697  700   * could lead to deadlocks.
 698  701   */
 699  702  struct zsd_entry {
 700  703          zone_key_t              zsd_key;        /* Key used to lookup value */
 701  704          void                    *zsd_data;      /* Caller-managed value */
 702  705          /*
 703  706           * Callbacks to be executed when a zone is created, shutdown, and
 704  707           * destroyed, respectively.
 705  708           */
 706  709          void                    *(*zsd_create)(zoneid_t);
 707  710          void                    (*zsd_shutdown)(zoneid_t, void *);
 708  711          void                    (*zsd_destroy)(zoneid_t, void *);
 709  712          list_node_t             zsd_linkage;
 710  713          uint16_t                zsd_flags;      /* See below */
 711  714          kcondvar_t              zsd_cv;
 712  715  };
 713  716  
 714  717  /*
 715  718   * zsd_flags
 716  719   */
 717  720  #define ZSD_CREATE_NEEDED       0x0001
 718  721  #define ZSD_CREATE_INPROGRESS   0x0002
 719  722  #define ZSD_CREATE_COMPLETED    0x0004
 720  723  #define ZSD_SHUTDOWN_NEEDED     0x0010
 721  724  #define ZSD_SHUTDOWN_INPROGRESS 0x0020
 722  725  #define ZSD_SHUTDOWN_COMPLETED  0x0040
 723  726  #define ZSD_DESTROY_NEEDED      0x0100
 724  727  #define ZSD_DESTROY_INPROGRESS  0x0200
 725  728  #define ZSD_DESTROY_COMPLETED   0x0400
 726  729  
 727  730  #define ZSD_CREATE_ALL  \
 728  731          (ZSD_CREATE_NEEDED|ZSD_CREATE_INPROGRESS|ZSD_CREATE_COMPLETED)
 729  732  #define ZSD_SHUTDOWN_ALL        \
 730  733          (ZSD_SHUTDOWN_NEEDED|ZSD_SHUTDOWN_INPROGRESS|ZSD_SHUTDOWN_COMPLETED)
 731  734  #define ZSD_DESTROY_ALL \
 732  735          (ZSD_DESTROY_NEEDED|ZSD_DESTROY_INPROGRESS|ZSD_DESTROY_COMPLETED)
 733  736  
 734  737  #define ZSD_ALL_INPROGRESS \
 735  738          (ZSD_CREATE_INPROGRESS|ZSD_SHUTDOWN_INPROGRESS|ZSD_DESTROY_INPROGRESS)
 736  739  
 737  740  /*
 738  741   * Macros to help with zone visibility restrictions.
 739  742   */
 740  743  
 741  744  /*
 742  745   * Is process in the global zone?
 743  746   */
 744  747  #define INGLOBALZONE(p) \
 745  748          ((p)->p_zone == global_zone)
 746  749  
 747  750  /*
 748  751   * Can process view objects in given zone?
 749  752   */
 750  753  #define HASZONEACCESS(p, zoneid) \
 751  754          ((p)->p_zone->zone_id == (zoneid) || INGLOBALZONE(p))
 752  755  
 753  756  /*
 754  757   * Convenience macro to see if a resolved path is visible from within a
 755  758   * given zone.
 756  759   *
 757  760   * The basic idea is that the first (zone_rootpathlen - 1) bytes of the
 758  761   * two strings must be equal.  Since the rootpathlen has a trailing '/',
 759  762   * we want to skip everything in the path up to (but not including) the
 760  763   * trailing '/'.
 761  764   */
 762  765  #define ZONE_PATH_VISIBLE(path, zone) \
 763  766          (strncmp((path), (zone)->zone_rootpath,         \
 764  767              (zone)->zone_rootpathlen - 1) == 0)
 765  768  
 766  769  /*
 767  770   * Convenience macro to go from the global view of a path to that seen
 768  771   * from within said zone.  It is the responsibility of the caller to
 769  772   * ensure that the path is a resolved one (ie, no '..'s or '.'s), and is
 770  773   * in fact visible from within the zone.
 771  774   */
  
    | 
      ↓ open down ↓ | 
    121 lines elided | 
    
      ↑ open up ↑ | 
  
 772  775  #define ZONE_PATH_TRANSLATE(path, zone) \
 773  776          (ASSERT(ZONE_PATH_VISIBLE(path, zone)), \
 774  777          (path) + (zone)->zone_rootpathlen - 2)
 775  778  
 776  779  /*
 777  780   * Special processes visible in all zones.
 778  781   */
 779  782  #define ZONE_SPECIALPID(x)       ((x) == 0 || (x) == 1)
 780  783  
 781  784  /*
      785 + * A root vnode of the current zone.
      786 + */
      787 +#define ZONE_ROOTVP()   (curproc->p_zone->zone_rootvp)
      788 +
      789 +/*
 782  790   * Zone-safe version of thread_create() to be used when the caller wants to
 783  791   * create a kernel thread to run within the current zone's context.
 784  792   */
 785  793  extern kthread_t *zthread_create(caddr_t, size_t, void (*)(), void *, size_t,
 786  794      pri_t);
 787  795  extern void zthread_exit(void);
 788  796  
 789  797  /*
 790  798   * Functions for an external observer to register interest in a zone's status
 791  799   * change.  Observers will be woken up when the zone status equals the status
 792  800   * argument passed in (in the case of zone_status_timedwait, the function may
 793  801   * also return because of a timeout; zone_status_wait_sig may return early due
 794  802   * to a signal being delivered; zone_status_timedwait_sig may return for any of
 795  803   * the above reasons).
 796  804   *
 797  805   * Otherwise these behave identically to cv_timedwait(), cv_wait(), and
 798  806   * cv_wait_sig() respectively.
 799  807   */
 800  808  extern clock_t zone_status_timedwait(zone_t *, clock_t, zone_status_t);
 801  809  extern clock_t zone_status_timedwait_sig(zone_t *, clock_t, zone_status_t);
 802  810  extern void zone_status_wait(zone_t *, zone_status_t);
 803  811  extern int zone_status_wait_sig(zone_t *, zone_status_t);
 804  812  
 805  813  /*
 806  814   * Get the status  of the zone (at the time it was called).  The state may
 807  815   * have progressed by the time it is returned.
 808  816   */
 809  817  extern zone_status_t zone_status_get(zone_t *);
 810  818  
 811  819  /*
 812  820   * Safely get the hostid of the specified zone (defaults to machine's hostid
 813  821   * if the specified zone doesn't emulate a hostid).  Passing NULL retrieves
 814  822   * the global zone's (i.e., physical system's) hostid.
 815  823   */
 816  824  extern uint32_t zone_get_hostid(zone_t *);
 817  825  
 818  826  /*
 819  827   * Get the "kcred" credentials corresponding to the given zone.
 820  828   */
 821  829  extern struct cred *zone_get_kcred(zoneid_t);
 822  830  
 823  831  /*
 824  832   * Get/set the pool the zone is currently bound to.
 825  833   */
 826  834  extern struct pool *zone_pool_get(zone_t *);
 827  835  extern void zone_pool_set(zone_t *, struct pool *);
 828  836  
 829  837  /*
 830  838   * Get/set the pset the zone is currently using.
 831  839   */
 832  840  extern psetid_t zone_pset_get(zone_t *);
 833  841  extern void zone_pset_set(zone_t *, psetid_t);
 834  842  
 835  843  /*
 836  844   * Get the number of cpus/online-cpus visible from the given zone.
 837  845   */
 838  846  extern int zone_ncpus_get(zone_t *);
 839  847  extern int zone_ncpus_online_get(zone_t *);
 840  848  
 841  849  /*
 842  850   * Returns true if the named pool/dataset is visible in the current zone.
 843  851   */
 844  852  extern int zone_dataset_visible(const char *, int *);
 845  853  
 846  854  /*
 847  855   * zone version of kadmin()
 848  856   */
 849  857  extern int zone_kadmin(int, int, const char *, cred_t *);
 850  858  extern void zone_shutdown_global(void);
 851  859  
 852  860  extern void mount_in_progress(zone_t *);
 853  861  extern void mount_completed(zone_t *);
 854  862  
 855  863  extern int zone_walk(int (*)(zone_t *, void *), void *);
 856  864  
 857  865  extern rctl_hndl_t rc_zone_locked_mem;
 858  866  extern rctl_hndl_t rc_zone_max_swap;
 859  867  extern rctl_hndl_t rc_zone_max_lofi;
 860  868  
 861  869  #endif  /* _KERNEL */
 862  870  
 863  871  #ifdef  __cplusplus
 864  872  }
 865  873  #endif
 866  874  
 867  875  #endif  /* _SYS_ZONE_H */
  
    | 
      ↓ open down ↓ | 
    76 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX