Print this page
Remove most KEBE comments and accompanying unused code or variables/fields.
zone.h mismerge
OS-338 Kstat counters to show "slow" VFS operations
OS-5187 improve /proc/diskstat handling
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-5179 flatten zvol entries for /dev and /proc/partitions
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-4915 want FX high priority zone configuration option
OS-4925 ps pri shows misleading value for zone in RT scheduling class
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-4781 would like to be able to add CT_PR_EV_EXIT to fatal event set of current contract
Bad mismerge of zone.h
OS-3820 lxbrand ptrace(2): the next generation
OS-3685 lxbrand PTRACE_O_TRACEFORK race condition
OS-3834 lxbrand 64-bit strace(1) reports 64-bit process as using x32 ABI
OS-3794 lxbrand panic on init signal death
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
OS-3050 Would like boot_time and init_pid for zones exposed to GZ kstats
OS-3429 Expose zone's init exit status
OS-803 make phys mem cap a bit harder
OS-1043 minimize vm_getusage impact
OS-11 rcapd behaves poorly when under extreme load
OS-399 zone phys. mem. cap should be a rctl and have associated kstat
        
@@ -21,10 +21,11 @@
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2015 Joyent, Inc. All rights reserved.
  * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
  * Copyright 2014 Igor Kozhukhov <ikozhukhov@gmail.com>.
+ * Copyright 2016, Joyent, Inc.
  */
 
 #ifndef _SYS_ZONE_H
 #define _SYS_ZONE_H
 
@@ -95,17 +96,23 @@
 #define ZONE_ATTR_INITPID       7
 #define ZONE_ATTR_SLBL          8
 #define ZONE_ATTR_INITNAME      9
 #define ZONE_ATTR_BOOTARGS      10
 #define ZONE_ATTR_BRAND         11
-#define ZONE_ATTR_PHYS_MCAP     12
+#define ZONE_ATTR_PMCAP_NOVER   12
 #define ZONE_ATTR_SCHED_CLASS   13
 #define ZONE_ATTR_FLAGS         14
 #define ZONE_ATTR_HOSTID        15
 #define ZONE_ATTR_FS_ALLOWED    16
 #define ZONE_ATTR_NETWORK       17
+#define ZONE_ATTR_DID           18
+#define ZONE_ATTR_PMCAP_PAGEOUT 19
 #define ZONE_ATTR_INITNORESTART 20
+#define ZONE_ATTR_PG_FLT_DELAY  21
+#define ZONE_ATTR_RSS           22
+#define ZONE_ATTR_APP_SVC_CT    23
+#define ZONE_ATTR_SCHED_FIXEDHI 24
 
 /* Start of the brand-specific attribute namespace */
 #define ZONE_ATTR_BRAND_ATTRS   32768
 
 #define ZONE_FS_ALLOWED_MAX     1024
@@ -242,13 +249,16 @@
  * The structure of a request to zoneadmd.
  */
 typedef struct zone_cmd_arg {
         uint64_t        uniqid;         /* unique "generation number" */
         zone_cmd_t      cmd;            /* requested action */
-        uint32_t        _pad;           /* need consistent 32/64 bit alignmt */
+        int             status;         /* init status on shutdown */
+        uint32_t        debug;          /* enable brand hook debug */
         char locale[MAXPATHLEN];        /* locale in which to render messages */
         char bootbuf[BOOTARGS_MAX];     /* arguments passed to zone_boot() */
+        /* Needed for 32/64 zoneadm -> zoneadmd door arg size check. */
+        int             pad;
 } zone_cmd_arg_t;
 
 /*
  * Structure of zoneadmd's response to a request.  A NULL return value means
  * the caller should attempt to restart zoneadmd and retry.
@@ -387,16 +397,55 @@
         uint_t          cycle_cnt;
         hrtime_t        zone_avg_cnt;
 } sys_zio_cntr_t;
 
 typedef struct {
+        kstat_named_t   zv_zonename;
+        kstat_named_t   zv_nread;
+        kstat_named_t   zv_reads;
+        kstat_named_t   zv_rtime;
+        kstat_named_t   zv_rlentime;
+        kstat_named_t   zv_rcnt;
+        kstat_named_t   zv_nwritten;
+        kstat_named_t   zv_writes;
+        kstat_named_t   zv_wtime;
+        kstat_named_t   zv_wlentime;
+        kstat_named_t   zv_wcnt;
+        kstat_named_t   zv_10ms_ops;
+        kstat_named_t   zv_100ms_ops;
+        kstat_named_t   zv_1s_ops;
+        kstat_named_t   zv_10s_ops;
+        kstat_named_t   zv_delay_cnt;
+        kstat_named_t   zv_delay_time;
+} zone_vfs_kstat_t;
+
+typedef struct {
+        kstat_named_t   zz_zonename;
+        kstat_named_t   zz_nread;
+        kstat_named_t   zz_reads;
+        kstat_named_t   zz_rtime;
+        kstat_named_t   zz_rlentime;
+        kstat_named_t   zz_nwritten;
+        kstat_named_t   zz_writes;
+        kstat_named_t   zz_waittime;
+} zone_zfs_kstat_t;
+
+typedef struct {
         kstat_named_t   zm_zonename;
+        kstat_named_t   zm_rss;
+        kstat_named_t   zm_phys_cap;
+        kstat_named_t   zm_swap;
+        kstat_named_t   zm_swap_cap;
+        kstat_named_t   zm_nover;
+        kstat_named_t   zm_pagedout;
         kstat_named_t   zm_pgpgin;
         kstat_named_t   zm_anonpgin;
         kstat_named_t   zm_execpgin;
         kstat_named_t   zm_fspgin;
         kstat_named_t   zm_anon_alloc_fail;
+        kstat_named_t   zm_pf_throttle;
+        kstat_named_t   zm_pf_throttle_usec;
 } zone_mcap_kstat_t;
 
 typedef struct {
         kstat_named_t   zm_zonename;    /* full name, kstat truncates name */
         kstat_named_t   zm_utime;
@@ -504,13 +553,14 @@
         list_t          zone_zsd;       /* list of Zone-Specific Data values */
         kcondvar_t      zone_cv;        /* used to signal state changes */
         struct proc     *zone_zsched;   /* Dummy kernel "zsched" process */
         pid_t           zone_proc_initpid; /* pid of "init" for this zone */
         char            *zone_initname; /* fs path to 'init' */
+        int             zone_init_status;       /* init's exit status */
         int             zone_boot_err;  /* for zone_boot() if boot fails */
         char            *zone_bootargs; /* arguments passed via zone_boot() */
-        uint64_t        zone_phys_mcap; /* physical memory cap */
+        rctl_qty_t      zone_phys_mem_ctl;      /* current phys. memory limit */
         /*
          * zone_kthreads is protected by zone_status_lock.
          */
         kthread_t       *zone_kthreads; /* kernel threads in zone */
         struct priv_set *zone_privset;  /* limit set for zone */
@@ -544,13 +594,16 @@
         ts_label_t      *zone_slabel;   /* zone sensitivity label */
         int             zone_match;     /* require label match for packets */
         tsol_mlp_list_t zone_mlps;      /* MLPs on zone-private addresses */
 
         boolean_t       zone_restart_init;      /* Restart init if it dies? */
+        boolean_t       zone_reboot_on_init_exit; /* Reboot if init dies? */
+        boolean_t       zone_setup_app_contract; /* setup contract? */
         struct brand    *zone_brand;            /* zone's brand */
         void            *zone_brand_data;       /* store brand specific data */
         id_t            zone_defaultcid;        /* dflt scheduling class id */
+        boolean_t       zone_fixed_hipri;       /* fixed sched. hi prio */
         kstat_t         *zone_swapresv_kstat;
         kstat_t         *zone_lockedmem_kstat;
         /*
          * zone_dl_list is protected by zone_lock
          */
@@ -571,10 +624,32 @@
         sys_zio_cntr_t  zone_rd_ops;            /* Counters for ZFS reads, */
         sys_zio_cntr_t  zone_wr_ops;            /* writes and */
         sys_zio_cntr_t  zone_lwr_ops;           /* logical writes. */
 
         /*
+         * kstats and counters for I/O ops and bytes.
+         */
+        kmutex_t        zone_io_lock;           /* protects I/O statistics */
+        kstat_t         *zone_io_ksp;
+        kstat_io_t      *zone_io_kiop;
+
+        /*
+         * kstats and counters for VFS ops and bytes.
+         */
+        kmutex_t        zone_vfs_lock;          /* protects VFS statistics */
+        kstat_t         *zone_vfs_ksp;
+        kstat_io_t      zone_vfs_rwstats;
+        zone_vfs_kstat_t *zone_vfs_stats;
+
+        /*
+         * kstats for ZFS I/O ops and bytes.
+         */
+        kmutex_t        zone_zfs_lock;          /* protects ZFS statistics */
+        kstat_io_t      zone_zfs_rwstats;
+        zone_zfs_kstat_t *zone_zfs_stats;
+
+        /*
          * Solaris Auditing per-zone audit context
          */
         struct au_kcontext      *zone_audit_kctxt;
         /*
          * For private use by mntfs.
@@ -588,19 +663,31 @@
         rctl_qty_t      zone_nprocs;    /* number of processes in the zone */
         rctl_qty_t      zone_nprocs_ctl;        /* current limit protected by */
                                                 /* zone_rctls->rcs_lock */
         kstat_t         *zone_nprocs_kstat;
 
+        /*
+         * kstats and counters for physical memory capping.
+         */
+        rctl_qty_t      zone_phys_mem;  /* current bytes of phys. mem. (RSS) */
+        kstat_t         *zone_physmem_kstat;
+        uint64_t        zone_mcap_nover;        /* # of times over phys. cap */
+        uint64_t        zone_mcap_pagedout;     /* bytes of mem. paged out */
         kmutex_t        zone_mcap_lock; /* protects mcap statistics */
         kstat_t         *zone_mcap_ksp;
         zone_mcap_kstat_t *zone_mcap_stats;
         uint64_t        zone_pgpgin;            /* pages paged in */
         uint64_t        zone_anonpgin;          /* anon pages paged in */
         uint64_t        zone_execpgin;          /* exec pages paged in */
         uint64_t        zone_fspgin;            /* fs pages paged in */
         uint64_t        zone_anon_alloc_fail;   /* cnt of anon alloc fails */
+        uint64_t        zone_pf_throttle;       /* cnt of page flt throttles */
+        uint64_t        zone_pf_throttle_usec;  /* time of page flt throttles */
 
+        /* Num usecs to throttle page fault when zone is over phys. mem cap */
+        uint32_t        zone_pg_flt_delay;
+
         /*
          * Misc. kstats and counters for zone cpu-usage aggregation.
          * The zone_Xtime values are the sum of the micro-state accounting
          * values for all threads that are running or have run in the zone.
          * This is tracked in msacct.c as threads change state.
@@ -857,10 +944,11 @@
 
 /*
  * Returns true if the named pool/dataset is visible in the current zone.
  */
 extern int zone_dataset_visible(const char *, int *);
+extern int zone_dataset_visible_inzone(zone_t *, const char *, int *);
 
 /*
  * zone version of kadmin()
  */
 extern int zone_kadmin(int, int, const char *, cred_t *);
@@ -871,10 +959,11 @@
 
 extern int zone_walk(int (*)(zone_t *, void *), void *);
 
 extern rctl_hndl_t rc_zone_locked_mem;
 extern rctl_hndl_t rc_zone_max_swap;
+extern rctl_hndl_t rc_zone_phys_mem;
 extern rctl_hndl_t rc_zone_max_lofi;
 
 #endif  /* _KERNEL */
 
 #ifdef  __cplusplus