Print this page
Remove most KEBE comments and accompanying unused code or variables/fields.
zone.h mismerge
OS-338 Kstat counters to show "slow" VFS operations
OS-5187 improve /proc/diskstat handling
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-5179 flatten zvol entries for /dev and /proc/partitions
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-4915 want FX high priority zone configuration option
OS-4925 ps pri shows misleading value for zone in RT scheduling class
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-4781 would like to be able to add CT_PR_EV_EXIT to fatal event set of current contract
Bad mismerge of zone.h
OS-3820 lxbrand ptrace(2): the next generation
OS-3685 lxbrand PTRACE_O_TRACEFORK race condition
OS-3834 lxbrand 64-bit strace(1) reports 64-bit process as using x32 ABI
OS-3794 lxbrand panic on init signal death
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
OS-3050 Would like boot_time and init_pid for zones exposed to GZ kstats
OS-3429 Expose zone's init exit status
OS-803 make phys mem cap a bit harder
OS-1043 minimize vm_getusage impact
OS-11 rcapd behaves poorly when under extreme load
OS-399 zone phys. mem. cap should be a rctl and have associated kstat

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/sys/zone.h
          +++ new/usr/src/uts/common/sys/zone.h
↓ open down ↓ 15 lines elided ↑ open up ↑
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright 2015 Joyent, Inc. All rights reserved.
  24   24   * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
  25   25   * Copyright 2014 Igor Kozhukhov <ikozhukhov@gmail.com>.
       26 + * Copyright 2016, Joyent, Inc.
  26   27   */
  27   28  
  28   29  #ifndef _SYS_ZONE_H
  29   30  #define _SYS_ZONE_H
  30   31  
  31   32  #include <sys/types.h>
  32   33  #include <sys/mutex.h>
  33   34  #include <sys/param.h>
  34   35  #include <sys/rctl.h>
  35   36  #include <sys/ipc_rctl.h>
↓ open down ↓ 54 lines elided ↑ open up ↑
  90   91  #define ZONE_ATTR_NAME          2
  91   92  #define ZONE_ATTR_STATUS        3
  92   93  #define ZONE_ATTR_PRIVSET       4
  93   94  #define ZONE_ATTR_UNIQID        5
  94   95  #define ZONE_ATTR_POOLID        6
  95   96  #define ZONE_ATTR_INITPID       7
  96   97  #define ZONE_ATTR_SLBL          8
  97   98  #define ZONE_ATTR_INITNAME      9
  98   99  #define ZONE_ATTR_BOOTARGS      10
  99  100  #define ZONE_ATTR_BRAND         11
 100      -#define ZONE_ATTR_PHYS_MCAP     12
      101 +#define ZONE_ATTR_PMCAP_NOVER   12
 101  102  #define ZONE_ATTR_SCHED_CLASS   13
 102  103  #define ZONE_ATTR_FLAGS         14
 103  104  #define ZONE_ATTR_HOSTID        15
 104  105  #define ZONE_ATTR_FS_ALLOWED    16
 105  106  #define ZONE_ATTR_NETWORK       17
      107 +#define ZONE_ATTR_DID           18
      108 +#define ZONE_ATTR_PMCAP_PAGEOUT 19
 106  109  #define ZONE_ATTR_INITNORESTART 20
      110 +#define ZONE_ATTR_PG_FLT_DELAY  21
      111 +#define ZONE_ATTR_RSS           22
      112 +#define ZONE_ATTR_APP_SVC_CT    23
      113 +#define ZONE_ATTR_SCHED_FIXEDHI 24
 107  114  
 108  115  /* Start of the brand-specific attribute namespace */
 109  116  #define ZONE_ATTR_BRAND_ATTRS   32768
 110  117  
 111  118  #define ZONE_FS_ALLOWED_MAX     1024
 112  119  
 113  120  #define ZONE_EVENT_CHANNEL      "com.sun:zones:status"
 114  121  #define ZONE_EVENT_STATUS_CLASS "status"
 115  122  #define ZONE_EVENT_STATUS_SUBCLASS      "change"
 116  123  
↓ open down ↓ 120 lines elided ↑ open up ↑
 237  244          Z_READY, Z_BOOT, Z_FORCEBOOT, Z_REBOOT, Z_HALT, Z_NOTE_UNINSTALLING,
 238  245          Z_MOUNT, Z_FORCEMOUNT, Z_UNMOUNT, Z_SHUTDOWN
 239  246  } zone_cmd_t;
 240  247  
 241  248  /*
 242  249   * The structure of a request to zoneadmd.
 243  250   */
 244  251  typedef struct zone_cmd_arg {
 245  252          uint64_t        uniqid;         /* unique "generation number" */
 246  253          zone_cmd_t      cmd;            /* requested action */
 247      -        uint32_t        _pad;           /* need consistent 32/64 bit alignmt */
      254 +        int             status;         /* init status on shutdown */
      255 +        uint32_t        debug;          /* enable brand hook debug */
 248  256          char locale[MAXPATHLEN];        /* locale in which to render messages */
 249  257          char bootbuf[BOOTARGS_MAX];     /* arguments passed to zone_boot() */
      258 +        /* Needed for 32/64 zoneadm -> zoneadmd door arg size check. */
      259 +        int             pad;
 250  260  } zone_cmd_arg_t;
 251  261  
 252  262  /*
 253  263   * Structure of zoneadmd's response to a request.  A NULL return value means
 254  264   * the caller should attempt to restart zoneadmd and retry.
 255  265   */
 256  266  typedef struct zone_cmd_rval {
 257  267          int rval;                       /* return value of request */
 258  268          char errbuf[1]; /* variable-sized buffer containing error messages */
 259  269  } zone_cmd_rval_t;
↓ open down ↓ 122 lines elided ↑ open up ↑
 382  392  
 383  393  struct cpucap;
 384  394  
 385  395  typedef struct {
 386  396          hrtime_t        cycle_start;
 387  397          uint_t          cycle_cnt;
 388  398          hrtime_t        zone_avg_cnt;
 389  399  } sys_zio_cntr_t;
 390  400  
 391  401  typedef struct {
      402 +        kstat_named_t   zv_zonename;
      403 +        kstat_named_t   zv_nread;
      404 +        kstat_named_t   zv_reads;
      405 +        kstat_named_t   zv_rtime;
      406 +        kstat_named_t   zv_rlentime;
      407 +        kstat_named_t   zv_rcnt;
      408 +        kstat_named_t   zv_nwritten;
      409 +        kstat_named_t   zv_writes;
      410 +        kstat_named_t   zv_wtime;
      411 +        kstat_named_t   zv_wlentime;
      412 +        kstat_named_t   zv_wcnt;
      413 +        kstat_named_t   zv_10ms_ops;
      414 +        kstat_named_t   zv_100ms_ops;
      415 +        kstat_named_t   zv_1s_ops;
      416 +        kstat_named_t   zv_10s_ops;
      417 +        kstat_named_t   zv_delay_cnt;
      418 +        kstat_named_t   zv_delay_time;
      419 +} zone_vfs_kstat_t;
      420 +
      421 +typedef struct {
      422 +        kstat_named_t   zz_zonename;
      423 +        kstat_named_t   zz_nread;
      424 +        kstat_named_t   zz_reads;
      425 +        kstat_named_t   zz_rtime;
      426 +        kstat_named_t   zz_rlentime;
      427 +        kstat_named_t   zz_nwritten;
      428 +        kstat_named_t   zz_writes;
      429 +        kstat_named_t   zz_waittime;
      430 +} zone_zfs_kstat_t;
      431 +
      432 +typedef struct {
 392  433          kstat_named_t   zm_zonename;
      434 +        kstat_named_t   zm_rss;
      435 +        kstat_named_t   zm_phys_cap;
      436 +        kstat_named_t   zm_swap;
      437 +        kstat_named_t   zm_swap_cap;
      438 +        kstat_named_t   zm_nover;
      439 +        kstat_named_t   zm_pagedout;
 393  440          kstat_named_t   zm_pgpgin;
 394  441          kstat_named_t   zm_anonpgin;
 395  442          kstat_named_t   zm_execpgin;
 396  443          kstat_named_t   zm_fspgin;
 397  444          kstat_named_t   zm_anon_alloc_fail;
      445 +        kstat_named_t   zm_pf_throttle;
      446 +        kstat_named_t   zm_pf_throttle_usec;
 398  447  } zone_mcap_kstat_t;
 399  448  
 400  449  typedef struct {
 401  450          kstat_named_t   zm_zonename;    /* full name, kstat truncates name */
 402  451          kstat_named_t   zm_utime;
 403  452          kstat_named_t   zm_stime;
 404  453          kstat_named_t   zm_wtime;
 405  454          kstat_named_t   zm_avenrun1;
 406  455          kstat_named_t   zm_avenrun5;
 407  456          kstat_named_t   zm_avenrun15;
↓ open down ↓ 90 lines elided ↑ open up ↑
 498  547                                                  /* zone_rctls->rcs_lock */
 499  548          kmutex_t        zone_rctl_lock; /* protects zone_max_lofi */
 500  549          rctl_qty_t      zone_max_lofi; /* lofi devs for zone */
 501  550          rctl_qty_t      zone_max_lofi_ctl;      /* current lofi limit. */
 502  551                                                  /* Protected by */
 503  552                                                  /* zone_rctls->rcs_lock */
 504  553          list_t          zone_zsd;       /* list of Zone-Specific Data values */
 505  554          kcondvar_t      zone_cv;        /* used to signal state changes */
 506  555          struct proc     *zone_zsched;   /* Dummy kernel "zsched" process */
 507  556          pid_t           zone_proc_initpid; /* pid of "init" for this zone */
 508      -        char            *zone_initname; /* fs path to 'init' */
      557 +        char            *zone_initname;         /* fs path to 'init' */
      558 +        int             zone_init_status;       /* init's exit status */
 509  559          int             zone_boot_err;  /* for zone_boot() if boot fails */
 510  560          char            *zone_bootargs; /* arguments passed via zone_boot() */
 511      -        uint64_t        zone_phys_mcap; /* physical memory cap */
      561 +        rctl_qty_t      zone_phys_mem_ctl;      /* current phys. memory limit */
 512  562          /*
 513  563           * zone_kthreads is protected by zone_status_lock.
 514  564           */
 515  565          kthread_t       *zone_kthreads; /* kernel threads in zone */
 516  566          struct priv_set *zone_privset;  /* limit set for zone */
 517  567          /*
 518  568           * zone_vfslist is protected by vfs_list_lock().
 519  569           */
 520  570          struct vfs      *zone_vfslist;  /* list of FS's mounted in zone */
 521  571          uint64_t        zone_uniqid;    /* unique zone generation number */
↓ open down ↓ 17 lines elided ↑ open up ↑
 539  589          /*
 540  590           * List of ZFS datasets exported to this zone.
 541  591           */
 542  592          list_t          zone_datasets;  /* list of datasets */
 543  593  
 544  594          ts_label_t      *zone_slabel;   /* zone sensitivity label */
 545  595          int             zone_match;     /* require label match for packets */
 546  596          tsol_mlp_list_t zone_mlps;      /* MLPs on zone-private addresses */
 547  597  
 548  598          boolean_t       zone_restart_init;      /* Restart init if it dies? */
      599 +        boolean_t       zone_reboot_on_init_exit; /* Reboot if init dies? */
      600 +        boolean_t       zone_setup_app_contract; /* setup contract? */
 549  601          struct brand    *zone_brand;            /* zone's brand */
 550  602          void            *zone_brand_data;       /* store brand specific data */
 551  603          id_t            zone_defaultcid;        /* dflt scheduling class id */
      604 +        boolean_t       zone_fixed_hipri;       /* fixed sched. hi prio */
 552  605          kstat_t         *zone_swapresv_kstat;
 553  606          kstat_t         *zone_lockedmem_kstat;
 554  607          /*
 555  608           * zone_dl_list is protected by zone_lock
 556  609           */
 557  610          list_t          zone_dl_list;
 558  611          netstack_t      *zone_netstack;
 559  612          struct cpucap   *zone_cpucap;   /* CPU caps data */
 560  613  
 561  614          /*
↓ open down ↓ 4 lines elided ↑ open up ↑
 566  619          uint64_t        zone_zfs_weight;        /* used to prevent starvation */
 567  620          uint64_t        zone_io_util;           /* IO utilization metric */
 568  621          boolean_t       zone_io_util_above_avg; /* IO util percent > avg. */
 569  622          uint16_t        zone_io_delay;          /* IO delay on logical r/w */
 570  623          kmutex_t        zone_stg_io_lock;       /* protects IO window data */
 571  624          sys_zio_cntr_t  zone_rd_ops;            /* Counters for ZFS reads, */
 572  625          sys_zio_cntr_t  zone_wr_ops;            /* writes and */
 573  626          sys_zio_cntr_t  zone_lwr_ops;           /* logical writes. */
 574  627  
 575  628          /*
      629 +         * kstats and counters for I/O ops and bytes.
      630 +         */
      631 +        kmutex_t        zone_io_lock;           /* protects I/O statistics */
      632 +        kstat_t         *zone_io_ksp;
      633 +        kstat_io_t      *zone_io_kiop;
      634 +
      635 +        /*
      636 +         * kstats and counters for VFS ops and bytes.
      637 +         */
      638 +        kmutex_t        zone_vfs_lock;          /* protects VFS statistics */
      639 +        kstat_t         *zone_vfs_ksp;
      640 +        kstat_io_t      zone_vfs_rwstats;
      641 +        zone_vfs_kstat_t *zone_vfs_stats;
      642 +
      643 +        /*
      644 +         * kstats for ZFS I/O ops and bytes.
      645 +         */
      646 +        kmutex_t        zone_zfs_lock;          /* protects ZFS statistics */
      647 +        kstat_io_t      zone_zfs_rwstats;
      648 +        zone_zfs_kstat_t *zone_zfs_stats;
      649 +
      650 +        /*
 576  651           * Solaris Auditing per-zone audit context
 577  652           */
 578  653          struct au_kcontext      *zone_audit_kctxt;
 579  654          /*
 580  655           * For private use by mntfs.
 581  656           */
 582  657          struct mntelem  *zone_mntfs_db;
 583  658          krwlock_t       zone_mntfs_db_lock;
 584  659  
 585  660          struct klpd_reg         *zone_pfexecd;
 586  661  
 587  662          char            *zone_fs_allowed;
 588  663          rctl_qty_t      zone_nprocs;    /* number of processes in the zone */
 589  664          rctl_qty_t      zone_nprocs_ctl;        /* current limit protected by */
 590  665                                                  /* zone_rctls->rcs_lock */
 591  666          kstat_t         *zone_nprocs_kstat;
 592  667  
      668 +        /*
      669 +         * kstats and counters for physical memory capping.
      670 +         */
      671 +        rctl_qty_t      zone_phys_mem;  /* current bytes of phys. mem. (RSS) */
      672 +        kstat_t         *zone_physmem_kstat;
      673 +        uint64_t        zone_mcap_nover;        /* # of times over phys. cap */
      674 +        uint64_t        zone_mcap_pagedout;     /* bytes of mem. paged out */
 593  675          kmutex_t        zone_mcap_lock; /* protects mcap statistics */
 594  676          kstat_t         *zone_mcap_ksp;
 595  677          zone_mcap_kstat_t *zone_mcap_stats;
 596  678          uint64_t        zone_pgpgin;            /* pages paged in */
 597  679          uint64_t        zone_anonpgin;          /* anon pages paged in */
 598  680          uint64_t        zone_execpgin;          /* exec pages paged in */
 599  681          uint64_t        zone_fspgin;            /* fs pages paged in */
 600  682          uint64_t        zone_anon_alloc_fail;   /* cnt of anon alloc fails */
      683 +        uint64_t        zone_pf_throttle;       /* cnt of page flt throttles */
      684 +        uint64_t        zone_pf_throttle_usec;  /* time of page flt throttles */
 601  685  
      686 +        /* Num usecs to throttle page fault when zone is over phys. mem cap */
      687 +        uint32_t        zone_pg_flt_delay;
      688 +
 602  689          /*
 603  690           * Misc. kstats and counters for zone cpu-usage aggregation.
 604  691           * The zone_Xtime values are the sum of the micro-state accounting
 605  692           * values for all threads that are running or have run in the zone.
 606  693           * This is tracked in msacct.c as threads change state.
 607  694           * The zone_stime is the sum of the LMS_SYSTEM times.
 608  695           * The zone_utime is the sum of the LMS_USER times.
 609  696           * The zone_wtime is the sum of the LMS_WAIT_CPU times.
 610  697           * As with per-thread micro-state accounting values, these values are
 611  698           * not scaled to nanosecs.  The scaling is done by the
↓ open down ↓ 240 lines elided ↑ open up ↑
 852  939  /*
 853  940   * Get the number of cpus/online-cpus visible from the given zone.
 854  941   */
 855  942  extern int zone_ncpus_get(zone_t *);
 856  943  extern int zone_ncpus_online_get(zone_t *);
 857  944  
 858  945  /*
 859  946   * Returns true if the named pool/dataset is visible in the current zone.
 860  947   */
 861  948  extern int zone_dataset_visible(const char *, int *);
      949 +extern int zone_dataset_visible_inzone(zone_t *, const char *, int *);
 862  950  
 863  951  /*
 864  952   * zone version of kadmin()
 865  953   */
 866  954  extern int zone_kadmin(int, int, const char *, cred_t *);
 867  955  extern void zone_shutdown_global(void);
 868  956  
 869  957  extern void mount_in_progress(zone_t *);
 870  958  extern void mount_completed(zone_t *);
 871  959  
 872  960  extern int zone_walk(int (*)(zone_t *, void *), void *);
 873  961  
 874  962  extern rctl_hndl_t rc_zone_locked_mem;
 875  963  extern rctl_hndl_t rc_zone_max_swap;
      964 +extern rctl_hndl_t rc_zone_phys_mem;
 876  965  extern rctl_hndl_t rc_zone_max_lofi;
 877  966  
 878  967  #endif  /* _KERNEL */
 879  968  
 880  969  #ifdef  __cplusplus
 881  970  }
 882  971  #endif
 883  972  
 884  973  #endif  /* _SYS_ZONE_H */
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX