Print this page

        

@@ -373,12 +373,10 @@
 rctl_hndl_t rc_zone_locked_mem;
 rctl_hndl_t rc_zone_max_swap;
 rctl_hndl_t rc_zone_phys_mem;
 rctl_hndl_t rc_zone_max_lofi;
 rctl_hndl_t rc_zone_cpu_cap;
-rctl_hndl_t rc_zone_cpu_baseline;
-rctl_hndl_t rc_zone_cpu_burst_time;
 rctl_hndl_t rc_zone_zfs_io_pri;
 rctl_hndl_t rc_zone_nlwps;
 rctl_hndl_t rc_zone_nprocs;
 rctl_hndl_t rc_zone_shmmax;
 rctl_hndl_t rc_zone_shmmni;

@@ -421,13 +419,12 @@
  * Version 4 alters the zone_create system call in order to support
  *     Trusted Extensions.
  * Version 5 alters the zone_boot system call, and converts its old
  *     bootargs parameter to be set by the zone_setattr API instead.
  * Version 6 adds the flag argument to zone_create.
- * Version 7 adds the requested zoneid to zone_create.
  */
-static const int ZONE_SYSCALL_API_VERSION = 7;
+static const int ZONE_SYSCALL_API_VERSION = 6;
 
 /*
  * Certain filesystems (such as NFS and autofs) need to know which zone
  * the mount is being placed in.  Because of this, we need to be able to
  * ensure that a zone isn't in the process of being created/destroyed such

@@ -1380,81 +1377,11 @@
         zone_cpu_cap_get,
         zone_cpu_cap_set,
         rcop_no_test
 };
 
-/*ARGSUSED*/
-static rctl_qty_t
-zone_cpu_base_get(rctl_t *rctl, struct proc *p)
-{
-        ASSERT(MUTEX_HELD(&p->p_lock));
-        return (cpucaps_zone_get_base(p->p_zone));
-}
-
 /*
- * The zone cpu base is used to set the baseline CPU for the zone
- * so we can track when the zone is bursting.
- */
-/*ARGSUSED*/
-static int
-zone_cpu_base_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
-    rctl_qty_t nv)
-{
-        zone_t *zone = e->rcep_p.zone;
-
-        ASSERT(MUTEX_HELD(&p->p_lock));
-        ASSERT(e->rcep_t == RCENTITY_ZONE);
-
-        if (zone == NULL)
-                return (0);
-
-        return (cpucaps_zone_set_base(zone, nv));
-}
-
-static rctl_ops_t zone_cpu_base_ops = {
-        rcop_no_action,
-        zone_cpu_base_get,
-        zone_cpu_base_set,
-        rcop_no_test
-};
-
-/*ARGSUSED*/
-static rctl_qty_t
-zone_cpu_burst_time_get(rctl_t *rctl, struct proc *p)
-{
-        ASSERT(MUTEX_HELD(&p->p_lock));
-        return (cpucaps_zone_get_burst_time(p->p_zone));
-}
-
-/*
- * The zone cpu burst time is used to set the amount of time CPU(s) can be
- * bursting for the zone.
- */
-/*ARGSUSED*/
-static int
-zone_cpu_burst_time_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
-    rctl_qty_t nv)
-{
-        zone_t *zone = e->rcep_p.zone;
-
-        ASSERT(MUTEX_HELD(&p->p_lock));
-        ASSERT(e->rcep_t == RCENTITY_ZONE);
-
-        if (zone == NULL)
-                return (0);
-
-        return (cpucaps_zone_set_burst_time(zone, nv));
-}
-
-static rctl_ops_t zone_cpu_burst_time_ops = {
-        rcop_no_action,
-        zone_cpu_burst_time_get,
-        zone_cpu_burst_time_set,
-        rcop_no_test
-};
-
-/*
  * zone.zfs-io-pri resource control support (IO priority).
  */
 /*ARGSUSED*/
 static rctl_qty_t
 zone_zfs_io_pri_get(rctl_t *rctl, struct proc *p)

@@ -1984,11 +1911,11 @@
         zk->zk_value.value.ui64 = zone->zone_max_swap_ctl;
         return (0);
 }
 
 static kstat_t *
-zone_rctl_kstat_create_common(zone_t *zone, char *name,
+zone_kstat_create_common(zone_t *zone, char *name,
     int (*updatefunc) (kstat_t *, int))
 {
         kstat_t *ksp;
         zone_kstat_t *zk;
 

@@ -2213,10 +2140,11 @@
         zone->zone_mcap_stats = zmp;
 
         /* The kstat "name" field is not large enough for a full zonename */
         kstat_named_init(&zmp->zm_zonename, "zonename", KSTAT_DATA_STRING);
         kstat_named_setstr(&zmp->zm_zonename, zone->zone_name);
+        kstat_named_setstr(&zmp->zm_zonename, zone->zone_name);
         kstat_named_init(&zmp->zm_rss, "rss", KSTAT_DATA_UINT64);
         kstat_named_init(&zmp->zm_phys_cap, "physcap", KSTAT_DATA_UINT64);
         kstat_named_init(&zmp->zm_swap, "swap", KSTAT_DATA_UINT64);
         kstat_named_init(&zmp->zm_swap_cap, "swapcap", KSTAT_DATA_UINT64);
         kstat_named_init(&zmp->zm_nover, "nover", KSTAT_DATA_UINT64);

@@ -2266,12 +2194,10 @@
         zmp->zm_ffcap.value.ui32 = zone->zone_ffcap;
         zmp->zm_ffnoproc.value.ui32 = zone->zone_ffnoproc;
         zmp->zm_ffnomem.value.ui32 = zone->zone_ffnomem;
         zmp->zm_ffmisc.value.ui32 = zone->zone_ffmisc;
 
-        zmp->zm_mfseglim.value.ui32 = zone->zone_mfseglim;
-
         zmp->zm_nested_intp.value.ui32 = zone->zone_nested_intp;
 
         zmp->zm_init_pid.value.ui32 = zone->zone_proc_initpid;
         zmp->zm_boot_time.value.ui64 = (uint64_t)zone->zone_boot_time;
 

@@ -2311,12 +2237,10 @@
         kstat_named_init(&zmp->zm_ffcap, "forkfail_cap", KSTAT_DATA_UINT32);
         kstat_named_init(&zmp->zm_ffnoproc, "forkfail_noproc",
             KSTAT_DATA_UINT32);
         kstat_named_init(&zmp->zm_ffnomem, "forkfail_nomem", KSTAT_DATA_UINT32);
         kstat_named_init(&zmp->zm_ffmisc, "forkfail_misc", KSTAT_DATA_UINT32);
-        kstat_named_init(&zmp->zm_mfseglim, "mapfail_seglim",
-            KSTAT_DATA_UINT32);
         kstat_named_init(&zmp->zm_nested_intp, "nested_interp",
             KSTAT_DATA_UINT32);
         kstat_named_init(&zmp->zm_init_pid, "init_pid", KSTAT_DATA_UINT32);
         kstat_named_init(&zmp->zm_boot_time, "boot_time", KSTAT_DATA_UINT64);
 

@@ -2328,38 +2252,34 @@
 }
 
 static void
 zone_kstat_create(zone_t *zone)
 {
-        zone->zone_lockedmem_kstat = zone_rctl_kstat_create_common(zone,
+        zone->zone_lockedmem_kstat = zone_kstat_create_common(zone,
             "lockedmem", zone_lockedmem_kstat_update);
-        zone->zone_swapresv_kstat = zone_rctl_kstat_create_common(zone,
+        zone->zone_swapresv_kstat = zone_kstat_create_common(zone,
             "swapresv", zone_swapresv_kstat_update);
-        zone->zone_physmem_kstat = zone_rctl_kstat_create_common(zone,
+        zone->zone_physmem_kstat = zone_kstat_create_common(zone,
             "physicalmem", zone_physmem_kstat_update);
-        zone->zone_nprocs_kstat = zone_rctl_kstat_create_common(zone,
+        zone->zone_nprocs_kstat = zone_kstat_create_common(zone,
             "nprocs", zone_nprocs_kstat_update);
 
         if ((zone->zone_vfs_ksp = zone_vfs_kstat_create(zone)) == NULL) {
                 zone->zone_vfs_stats = kmem_zalloc(
                     sizeof (zone_vfs_kstat_t), KM_SLEEP);
         }
 
-        if ((zone->zone_zfs_ksp = zone_zfs_kstat_create(zone)) == NULL) {
-                zone->zone_zfs_stats = kmem_zalloc(
-                    sizeof (zone_zfs_kstat_t), KM_SLEEP);
-        }
-
         if ((zone->zone_mcap_ksp = zone_mcap_kstat_create(zone)) == NULL) {
                 zone->zone_mcap_stats = kmem_zalloc(
                     sizeof (zone_mcap_kstat_t), KM_SLEEP);
         }
 
         if ((zone->zone_misc_ksp = zone_misc_kstat_create(zone)) == NULL) {
                 zone->zone_misc_stats = kmem_zalloc(
                     sizeof (zone_misc_kstat_t), KM_SLEEP);
         }
+
 }
 
 static void
 zone_kstat_delete_common(kstat_t **pkstat, size_t datasz)
 {

@@ -2385,16 +2305,15 @@
         zone_kstat_delete_common(&zone->zone_nprocs_kstat,
             sizeof (zone_kstat_t));
 
         zone_kstat_delete_common(&zone->zone_vfs_ksp,
             sizeof (zone_vfs_kstat_t));
-        zone_kstat_delete_common(&zone->zone_zfs_ksp,
-            sizeof (zone_zfs_kstat_t));
         zone_kstat_delete_common(&zone->zone_mcap_ksp,
             sizeof (zone_mcap_kstat_t));
         zone_kstat_delete_common(&zone->zone_misc_ksp,
             sizeof (zone_misc_kstat_t));
+
 }
 
 /*
  * Called very early on in boot to initialize the ZSD list so that
  * zone_key_create() can be called before zone_init().  It also initializes

@@ -2452,10 +2371,11 @@
         zone0.zone_lockedmem_kstat = NULL;
         zone0.zone_swapresv_kstat = NULL;
         zone0.zone_physmem_kstat = NULL;
         zone0.zone_nprocs_kstat = NULL;
         zone0.zone_zfs_io_pri = 1;
+
         zone0.zone_stime = 0;
         zone0.zone_utime = 0;
         zone0.zone_wtime = 0;
 
         list_create(&zone0.zone_ref_list, sizeof (zone_ref_t),

@@ -2562,20 +2482,10 @@
             RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_ALWAYS |
             RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT |RCTL_GLOBAL_SYSLOG_NEVER |
             RCTL_GLOBAL_INFINITE,
             MAXCAP, MAXCAP, &zone_cpu_cap_ops);
 
-        rc_zone_cpu_baseline = rctl_register("zone.cpu-baseline",
-            RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
-            RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
-            MAXCAP, MAXCAP, &zone_cpu_base_ops);
-
-        rc_zone_cpu_burst_time = rctl_register("zone.cpu-burst-time",
-            RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
-            RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
-            INT_MAX, INT_MAX, &zone_cpu_burst_time_ops);
-
         rc_zone_zfs_io_pri = rctl_register("zone.zfs-io-priority",
             RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
             RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
             16384, 16384, &zone_zfs_io_pri_ops);
 

@@ -2618,24 +2528,10 @@
         dval->rcv_action_recip_pid = -1;
 
         rde = rctl_dict_lookup("zone.cpu-shares");
         (void) rctl_val_list_insert(&rde->rcd_default_value, dval);
 
-        /*
-         * Create a rctl_val with PRIVILEGED, NOACTION, value = 1.  Then attach
-         * this at the head of the rctl_dict_entry for ``zone.zfs-io-priority'.
-         */
-        dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
-        bzero(dval, sizeof (rctl_val_t));
-        dval->rcv_value = 1;
-        dval->rcv_privilege = RCPRIV_PRIVILEGED;
-        dval->rcv_flagaction = RCTL_LOCAL_NOACTION;
-        dval->rcv_action_recip_pid = -1;
-
-        rde = rctl_dict_lookup("zone.zfs-io-priority");
-        (void) rctl_val_list_insert(&rde->rcd_default_value, dval);
-
         rc_zone_locked_mem = rctl_register("zone.max-locked-memory",
             RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
             RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
             &zone_locked_mem_ops);
 

@@ -2836,30 +2732,24 @@
  * Convenience function for setting zone status.
  */
 static void
 zone_status_set(zone_t *zone, zone_status_t status)
 {
-        timestruc_t now;
-        uint64_t t;
 
         nvlist_t *nvl = NULL;
         ASSERT(MUTEX_HELD(&zone_status_lock));
         ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE &&
             status >= zone_status_get(zone));
 
-        /* Current time since Jan 1 1970 but consumers expect NS */
-        gethrestime(&now);
-        t = (now.tv_sec * NANOSEC) + now.tv_nsec;
-
         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) ||
             nvlist_add_string(nvl, ZONE_CB_NAME, zone->zone_name) ||
             nvlist_add_string(nvl, ZONE_CB_NEWSTATE,
             zone_status_table[status]) ||
             nvlist_add_string(nvl, ZONE_CB_OLDSTATE,
             zone_status_table[zone->zone_status]) ||
             nvlist_add_int32(nvl, ZONE_CB_ZONEID, zone->zone_id) ||
-            nvlist_add_uint64(nvl, ZONE_CB_TIMESTAMP, t) ||
+            nvlist_add_uint64(nvl, ZONE_CB_TIMESTAMP, (uint64_t)gethrtime()) ||
             sysevent_evc_publish(zone_event_chan, ZONE_EVENT_STATUS_CLASS,
             ZONE_EVENT_STATUS_SUBCLASS, "sun.com", "kernel", nvl, EVCH_SLEEP)) {
 #ifdef DEBUG
                 (void) printf(
                     "Failed to allocate and send zone state change event.\n");

@@ -3456,16 +3346,10 @@
 getzoneid(void)
 {
         return (curproc->p_zone->zone_id);
 }
 
-zoneid_t
-getzonedid(void)
-{
-        return (curproc->p_zone->zone_did);
-}
-
 /*
  * Internal versions of zone_find_by_*().  These don't zone_hold() or
  * check the validity of a zone's state.
  */
 static zone_t *

@@ -4806,12 +4690,11 @@
                 nvlist_free(nvl);
         return (error);
 }
 
 int
-zone_create_error(int er_error, int er_ext, int *er_out)
-{
+zone_create_error(int er_error, int er_ext, int *er_out) {
         if (er_out != NULL) {
                 if (copyout(&er_ext, er_out, sizeof (int))) {
                         return (set_errno(EFAULT));
                 }
         }

@@ -4897,17 +4780,17 @@
 zone_create(const char *zone_name, const char *zone_root,
     const priv_set_t *zone_privs, size_t zone_privssz,
     caddr_t rctlbuf, size_t rctlbufsz,
     caddr_t zfsbuf, size_t zfsbufsz, int *extended_error,
     int match, uint32_t doi, const bslabel_t *label,
-    int flags, zoneid_t zone_did)
+    int flags)
 {
         struct zsched_arg zarg;
         nvlist_t *rctls = NULL;
         proc_t *pp = curproc;
         zone_t *zone, *ztmp;
-        zoneid_t zoneid, start = GLOBAL_ZONEID;
+        zoneid_t zoneid;
         int error;
         int error2 = 0;
         char *str;
         cred_t *zkcr;
         boolean_t insert_label_hash;

@@ -4918,63 +4801,12 @@
         /* can't boot zone from within chroot environment */
         if (PTOU(pp)->u_rdir != NULL && PTOU(pp)->u_rdir != rootdir)
                 return (zone_create_error(ENOTSUP, ZE_CHROOTED,
                     extended_error));
 
-        /*
-         * As the first step of zone creation, we want to allocate a zoneid.
-         * This allocation is complicated by the fact that netstacks use the
-         * zoneid to determine their stackid, but netstacks themselves are
-         * freed asynchronously with respect to zone destruction.  This means
-         * that a netstack reference leak (or in principle, an extraordinarily
-         * long netstack reference hold) could result in a zoneid being
-         * allocated that in fact corresponds to a stackid from an active
-         * (referenced) netstack -- unleashing all sorts of havoc when that
-         * netstack is actually (re)used.  (In the abstract, we might wish a
-         * zoneid to not be deallocated until its last referencing netstack
-         * has been released, but netstacks lack a backpointer into their
-         * referencing zone -- and changing them to have such a pointer would
-         * be substantial, to put it euphemistically.)  To avoid this, we
-         * detect this condition on allocation: if we have allocated a zoneid
-         * that corresponds to a netstack that's still in use, we warn about
-         * it (as it is much more likely to be a reference leak than an actual
-         * netstack reference), free it, and allocate another.  That these
-         * identifers are allocated out of an ID space assures that we won't
-         * see the identifier we just allocated.
-         */
-        for (;;) {
-                zoneid = id_alloc(zoneid_space);
-
-                if (!netstack_inuse_by_stackid(zoneid_to_netstackid(zoneid)))
-                        break;
-
-                id_free(zoneid_space, zoneid);
-
-                if (start == GLOBAL_ZONEID) {
-                        start = zoneid;
-                } else if (zoneid == start) {
-                        /*
-                         * We have managed to iterate over the entire available
-                         * zoneid space -- there are no identifiers available,
-                         * presumably due to some number of leaked netstack
-                         * references.  While it's in principle possible for us
-                         * to continue to try, it seems wiser to give up at
-                         * this point to warn and fail explicitly with a
-                         * distinctive error.
-                         */
-                        cmn_err(CE_WARN, "zone_create() failed: all available "
-                            "zone IDs have netstacks still in use");
-                        return (set_errno(ENFILE));
-                }
-
-                cmn_err(CE_WARN, "unable to reuse zone ID %d; "
-                    "netstack still in use", zoneid);
-        }
-
         zone = kmem_zalloc(sizeof (zone_t), KM_SLEEP);
-        zone->zone_id = zoneid;
-        zone->zone_did = zone_did;
+        zoneid = zone->zone_id = id_alloc(zoneid_space);
         zone->zone_status = ZONE_IS_UNINITIALIZED;
         zone->zone_pool = pool_default;
         zone->zone_pool_mod = gethrtime();
         zone->zone_psetid = ZONE_PS_INVAL;
         zone->zone_ncpus = 0;

@@ -5343,11 +5175,10 @@
  * before returning.
  */
 static int
 zone_empty(zone_t *zone)
 {
-        int cnt = 0;
         int waitstatus;
 
         /*
          * We need to drop zonehash_lock before killing all
          * processes, otherwise we'll deadlock with zone_find_*

@@ -5354,21 +5185,12 @@
          * which can be called from the exit path.
          */
         ASSERT(MUTEX_NOT_HELD(&zonehash_lock));
         while ((waitstatus = zone_status_timedwait_sig(zone,
             ddi_get_lbolt() + hz, ZONE_IS_EMPTY)) == -1) {
-                boolean_t force = B_FALSE;
-
-                /* Every 30 seconds, try harder */
-                if (cnt++ >= 30) {
-                        cmn_err(CE_WARN, "attempt to force kill zone %d\n",
-                            zone->zone_id);
-                        force = B_TRUE;
-                        cnt = 0;
+                killall(zone->zone_id);
                 }
-                killall(zone->zone_id, force);
-        }
         /*
          * return EINTR if we were signaled
          */
         if (waitstatus == 0)
                 return (EINTR);

@@ -6168,18 +5990,10 @@
                         if (error == 0 && copyout(zbuf, buf, bufsize) != 0)
                                 error = EFAULT;
                 }
                 kmem_free(zbuf, bufsize);
                 break;
-        case ZONE_ATTR_DID:
-                size = sizeof (zoneid_t);
-                if (bufsize > size)
-                        bufsize = size;
-
-                if (buf != NULL && copyout(&zone->zone_did, buf, bufsize) != 0)
-                        error = EFAULT;
-                break;
         case ZONE_ATTR_SCHED_FIXEDHI:
                 size = sizeof (boolean_t);
                 if (bufsize > size)
                         bufsize = size;
 

@@ -7008,11 +6822,10 @@
                             (int *)(unsigned long)zs32.extended_error;
                         zs.match = zs32.match;
                         zs.doi = zs32.doi;
                         zs.label = (const bslabel_t *)(uintptr_t)zs32.label;
                         zs.flags = zs32.flags;
-                        zs.zoneid = zs32.zoneid;
 #else
                         panic("get_udatamodel() returned bogus result\n");
 #endif
                 }
 

@@ -7019,11 +6832,11 @@
                 return (zone_create(zs.zone_name, zs.zone_root,
                     zs.zone_privs, zs.zone_privssz,
                     (caddr_t)zs.rctlbuf, zs.rctlbufsz,
                     (caddr_t)zs.zfsbuf, zs.zfsbufsz,
                     zs.extended_error, zs.match, zs.doi,
-                    zs.label, zs.flags, zs.zoneid));
+                    zs.label, zs.flags));
         case ZONE_BOOT:
                 return (zone_boot((zoneid_t)(uintptr_t)arg1));
         case ZONE_DESTROY:
                 return (zone_destroy((zoneid_t)(uintptr_t)arg1));
         case ZONE_GETATTR:

@@ -7298,11 +7111,11 @@
         /*
          * Kill everyone now and call zoneadmd later.
          * zone_ki_call_zoneadmd() will do a more thorough job of this
          * later.
          */
-        killall(zone->zone_id, B_FALSE);
+        killall(zone->zone_id);
         /*
          * Now, create the thread to contact zoneadmd and do the rest of the
          * work.  This thread can't be created in our zone otherwise
          * zone_destroy() would deadlock.
          */