Print this page


Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/os/zone.c
          +++ new/usr/src/uts/common/os/zone.c
↓ open down ↓ 367 lines elided ↑ open up ↑
 368  368  
 369  369  /*
 370  370   * This isn't static so lint doesn't complain.
 371  371   */
 372  372  rctl_hndl_t rc_zone_cpu_shares;
 373  373  rctl_hndl_t rc_zone_locked_mem;
 374  374  rctl_hndl_t rc_zone_max_swap;
 375  375  rctl_hndl_t rc_zone_phys_mem;
 376  376  rctl_hndl_t rc_zone_max_lofi;
 377  377  rctl_hndl_t rc_zone_cpu_cap;
 378      -rctl_hndl_t rc_zone_cpu_baseline;
 379      -rctl_hndl_t rc_zone_cpu_burst_time;
 380  378  rctl_hndl_t rc_zone_zfs_io_pri;
 381  379  rctl_hndl_t rc_zone_nlwps;
 382  380  rctl_hndl_t rc_zone_nprocs;
 383  381  rctl_hndl_t rc_zone_shmmax;
 384  382  rctl_hndl_t rc_zone_shmmni;
 385  383  rctl_hndl_t rc_zone_semmni;
 386  384  rctl_hndl_t rc_zone_msgmni;
 387  385  
 388  386  const char * const zone_default_initname = "/sbin/init";
 389  387  static char * const zone_prefix = "/zone/";
↓ open down ↓ 26 lines elided ↑ open up ↑
 416  414   * Version 2 alters the zone_create system call in order to support more
 417  415   *     arguments by moving the args into a structure; and to do better
 418  416   *     error reporting when zone_create() fails.
 419  417   * Version 3 alters the zone_create system call in order to support the
 420  418   *     import of ZFS datasets to zones.
 421  419   * Version 4 alters the zone_create system call in order to support
 422  420   *     Trusted Extensions.
 423  421   * Version 5 alters the zone_boot system call, and converts its old
 424  422   *     bootargs parameter to be set by the zone_setattr API instead.
 425  423   * Version 6 adds the flag argument to zone_create.
 426      - * Version 7 adds the requested zoneid to zone_create.
 427  424   */
 428      -static const int ZONE_SYSCALL_API_VERSION = 7;
      425 +static const int ZONE_SYSCALL_API_VERSION = 6;
 429  426  
 430  427  /*
 431  428   * Certain filesystems (such as NFS and autofs) need to know which zone
 432  429   * the mount is being placed in.  Because of this, we need to be able to
 433  430   * ensure that a zone isn't in the process of being created/destroyed such
 434  431   * that nfs_mount() thinks it is in the global/NGZ zone, while by the time
 435  432   * it gets added the list of mounted zones, it ends up on the wrong zone's
 436  433   * mount list. Since a zone can't reside on an NFS file system, we don't
 437  434   * have to worry about the zonepath itself.
 438  435   *
↓ open down ↓ 936 lines elided ↑ open up ↑
1375 1372          return (cpucaps_zone_set(zone, nv));
1376 1373  }
1377 1374  
1378 1375  static rctl_ops_t zone_cpu_cap_ops = {
1379 1376          rcop_no_action,
1380 1377          zone_cpu_cap_get,
1381 1378          zone_cpu_cap_set,
1382 1379          rcop_no_test
1383 1380  };
1384 1381  
1385      -/*ARGSUSED*/
1386      -static rctl_qty_t
1387      -zone_cpu_base_get(rctl_t *rctl, struct proc *p)
1388      -{
1389      -        ASSERT(MUTEX_HELD(&p->p_lock));
1390      -        return (cpucaps_zone_get_base(p->p_zone));
1391      -}
1392      -
1393 1382  /*
1394      - * The zone cpu base is used to set the baseline CPU for the zone
1395      - * so we can track when the zone is bursting.
1396      - */
1397      -/*ARGSUSED*/
1398      -static int
1399      -zone_cpu_base_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
1400      -    rctl_qty_t nv)
1401      -{
1402      -        zone_t *zone = e->rcep_p.zone;
1403      -
1404      -        ASSERT(MUTEX_HELD(&p->p_lock));
1405      -        ASSERT(e->rcep_t == RCENTITY_ZONE);
1406      -
1407      -        if (zone == NULL)
1408      -                return (0);
1409      -
1410      -        return (cpucaps_zone_set_base(zone, nv));
1411      -}
1412      -
1413      -static rctl_ops_t zone_cpu_base_ops = {
1414      -        rcop_no_action,
1415      -        zone_cpu_base_get,
1416      -        zone_cpu_base_set,
1417      -        rcop_no_test
1418      -};
1419      -
1420      -/*ARGSUSED*/
1421      -static rctl_qty_t
1422      -zone_cpu_burst_time_get(rctl_t *rctl, struct proc *p)
1423      -{
1424      -        ASSERT(MUTEX_HELD(&p->p_lock));
1425      -        return (cpucaps_zone_get_burst_time(p->p_zone));
1426      -}
1427      -
1428      -/*
1429      - * The zone cpu burst time is used to set the amount of time CPU(s) can be
1430      - * bursting for the zone.
1431      - */
1432      -/*ARGSUSED*/
1433      -static int
1434      -zone_cpu_burst_time_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
1435      -    rctl_qty_t nv)
1436      -{
1437      -        zone_t *zone = e->rcep_p.zone;
1438      -
1439      -        ASSERT(MUTEX_HELD(&p->p_lock));
1440      -        ASSERT(e->rcep_t == RCENTITY_ZONE);
1441      -
1442      -        if (zone == NULL)
1443      -                return (0);
1444      -
1445      -        return (cpucaps_zone_set_burst_time(zone, nv));
1446      -}
1447      -
1448      -static rctl_ops_t zone_cpu_burst_time_ops = {
1449      -        rcop_no_action,
1450      -        zone_cpu_burst_time_get,
1451      -        zone_cpu_burst_time_set,
1452      -        rcop_no_test
1453      -};
1454      -
1455      -/*
1456 1383   * zone.zfs-io-pri resource control support (IO priority).
1457 1384   */
1458 1385  /*ARGSUSED*/
1459 1386  static rctl_qty_t
1460 1387  zone_zfs_io_pri_get(rctl_t *rctl, struct proc *p)
1461 1388  {
1462 1389          ASSERT(MUTEX_HELD(&p->p_lock));
1463 1390          return (p->p_zone->zone_zfs_io_pri);
1464 1391  }
1465 1392  
↓ open down ↓ 513 lines elided ↑ open up ↑
1979 1906  
1980 1907          if (rw == KSTAT_WRITE)
1981 1908                  return (EACCES);
1982 1909  
1983 1910          zk->zk_usage.value.ui64 = zone->zone_max_swap;
1984 1911          zk->zk_value.value.ui64 = zone->zone_max_swap_ctl;
1985 1912          return (0);
1986 1913  }
1987 1914  
1988 1915  static kstat_t *
1989      -zone_rctl_kstat_create_common(zone_t *zone, char *name,
     1916 +zone_kstat_create_common(zone_t *zone, char *name,
1990 1917      int (*updatefunc) (kstat_t *, int))
1991 1918  {
1992 1919          kstat_t *ksp;
1993 1920          zone_kstat_t *zk;
1994 1921  
1995 1922          ksp = rctl_kstat_create_zone(zone, name, KSTAT_TYPE_NAMED,
1996 1923              sizeof (zone_kstat_t) / sizeof (kstat_named_t),
1997 1924              KSTAT_FLAG_VIRTUAL);
1998 1925  
1999 1926          if (ksp == NULL)
↓ open down ↓ 208 lines elided ↑ open up ↑
2208 2135                  kstat_zone_add(ksp, GLOBAL_ZONEID);
2209 2136  
2210 2137          zmp = ksp->ks_data = kmem_zalloc(sizeof (zone_mcap_kstat_t), KM_SLEEP);
2211 2138          ksp->ks_data_size += strlen(zone->zone_name) + 1;
2212 2139          ksp->ks_lock = &zone->zone_mcap_lock;
2213 2140          zone->zone_mcap_stats = zmp;
2214 2141  
2215 2142          /* The kstat "name" field is not large enough for a full zonename */
2216 2143          kstat_named_init(&zmp->zm_zonename, "zonename", KSTAT_DATA_STRING);
2217 2144          kstat_named_setstr(&zmp->zm_zonename, zone->zone_name);
     2145 +        kstat_named_setstr(&zmp->zm_zonename, zone->zone_name);
2218 2146          kstat_named_init(&zmp->zm_rss, "rss", KSTAT_DATA_UINT64);
2219 2147          kstat_named_init(&zmp->zm_phys_cap, "physcap", KSTAT_DATA_UINT64);
2220 2148          kstat_named_init(&zmp->zm_swap, "swap", KSTAT_DATA_UINT64);
2221 2149          kstat_named_init(&zmp->zm_swap_cap, "swapcap", KSTAT_DATA_UINT64);
2222 2150          kstat_named_init(&zmp->zm_nover, "nover", KSTAT_DATA_UINT64);
2223 2151          kstat_named_init(&zmp->zm_pagedout, "pagedout", KSTAT_DATA_UINT64);
2224 2152          kstat_named_init(&zmp->zm_pgpgin, "pgpgin", KSTAT_DATA_UINT64);
2225 2153          kstat_named_init(&zmp->zm_anonpgin, "anonpgin", KSTAT_DATA_UINT64);
2226 2154          kstat_named_init(&zmp->zm_execpgin, "execpgin", KSTAT_DATA_UINT64);
2227 2155          kstat_named_init(&zmp->zm_fspgin, "fspgin", KSTAT_DATA_UINT64);
↓ open down ↓ 33 lines elided ↑ open up ↑
2261 2189  
2262 2190          zmp->zm_avenrun1.value.ui32 = zone->zone_avenrun[0];
2263 2191          zmp->zm_avenrun5.value.ui32 = zone->zone_avenrun[1];
2264 2192          zmp->zm_avenrun15.value.ui32 = zone->zone_avenrun[2];
2265 2193  
2266 2194          zmp->zm_ffcap.value.ui32 = zone->zone_ffcap;
2267 2195          zmp->zm_ffnoproc.value.ui32 = zone->zone_ffnoproc;
2268 2196          zmp->zm_ffnomem.value.ui32 = zone->zone_ffnomem;
2269 2197          zmp->zm_ffmisc.value.ui32 = zone->zone_ffmisc;
2270 2198  
2271      -        zmp->zm_mfseglim.value.ui32 = zone->zone_mfseglim;
2272      -
2273 2199          zmp->zm_nested_intp.value.ui32 = zone->zone_nested_intp;
2274 2200  
2275 2201          zmp->zm_init_pid.value.ui32 = zone->zone_proc_initpid;
2276 2202          zmp->zm_boot_time.value.ui64 = (uint64_t)zone->zone_boot_time;
2277 2203  
2278 2204          return (0);
2279 2205  }
2280 2206  
2281 2207  static kstat_t *
2282 2208  zone_misc_kstat_create(zone_t *zone)
↓ open down ↓ 23 lines elided ↑ open up ↑
2306 2232          kstat_named_init(&zmp->zm_wtime, "nsec_waitrq", KSTAT_DATA_UINT64);
2307 2233          kstat_named_init(&zmp->zm_avenrun1, "avenrun_1min", KSTAT_DATA_UINT32);
2308 2234          kstat_named_init(&zmp->zm_avenrun5, "avenrun_5min", KSTAT_DATA_UINT32);
2309 2235          kstat_named_init(&zmp->zm_avenrun15, "avenrun_15min",
2310 2236              KSTAT_DATA_UINT32);
2311 2237          kstat_named_init(&zmp->zm_ffcap, "forkfail_cap", KSTAT_DATA_UINT32);
2312 2238          kstat_named_init(&zmp->zm_ffnoproc, "forkfail_noproc",
2313 2239              KSTAT_DATA_UINT32);
2314 2240          kstat_named_init(&zmp->zm_ffnomem, "forkfail_nomem", KSTAT_DATA_UINT32);
2315 2241          kstat_named_init(&zmp->zm_ffmisc, "forkfail_misc", KSTAT_DATA_UINT32);
2316      -        kstat_named_init(&zmp->zm_mfseglim, "mapfail_seglim",
2317      -            KSTAT_DATA_UINT32);
2318 2242          kstat_named_init(&zmp->zm_nested_intp, "nested_interp",
2319 2243              KSTAT_DATA_UINT32);
2320 2244          kstat_named_init(&zmp->zm_init_pid, "init_pid", KSTAT_DATA_UINT32);
2321 2245          kstat_named_init(&zmp->zm_boot_time, "boot_time", KSTAT_DATA_UINT64);
2322 2246  
2323 2247          ksp->ks_update = zone_misc_kstat_update;
2324 2248          ksp->ks_private = zone;
2325 2249  
2326 2250          kstat_install(ksp);
2327 2251          return (ksp);
2328 2252  }
2329 2253  
2330 2254  static void
2331 2255  zone_kstat_create(zone_t *zone)
2332 2256  {
2333      -        zone->zone_lockedmem_kstat = zone_rctl_kstat_create_common(zone,
     2257 +        zone->zone_lockedmem_kstat = zone_kstat_create_common(zone,
2334 2258              "lockedmem", zone_lockedmem_kstat_update);
2335      -        zone->zone_swapresv_kstat = zone_rctl_kstat_create_common(zone,
     2259 +        zone->zone_swapresv_kstat = zone_kstat_create_common(zone,
2336 2260              "swapresv", zone_swapresv_kstat_update);
2337      -        zone->zone_physmem_kstat = zone_rctl_kstat_create_common(zone,
     2261 +        zone->zone_physmem_kstat = zone_kstat_create_common(zone,
2338 2262              "physicalmem", zone_physmem_kstat_update);
2339      -        zone->zone_nprocs_kstat = zone_rctl_kstat_create_common(zone,
     2263 +        zone->zone_nprocs_kstat = zone_kstat_create_common(zone,
2340 2264              "nprocs", zone_nprocs_kstat_update);
2341 2265  
2342 2266          if ((zone->zone_vfs_ksp = zone_vfs_kstat_create(zone)) == NULL) {
2343 2267                  zone->zone_vfs_stats = kmem_zalloc(
2344 2268                      sizeof (zone_vfs_kstat_t), KM_SLEEP);
2345 2269          }
2346 2270  
2347      -        if ((zone->zone_zfs_ksp = zone_zfs_kstat_create(zone)) == NULL) {
2348      -                zone->zone_zfs_stats = kmem_zalloc(
2349      -                    sizeof (zone_zfs_kstat_t), KM_SLEEP);
2350      -        }
2351      -
2352 2271          if ((zone->zone_mcap_ksp = zone_mcap_kstat_create(zone)) == NULL) {
2353 2272                  zone->zone_mcap_stats = kmem_zalloc(
2354 2273                      sizeof (zone_mcap_kstat_t), KM_SLEEP);
2355 2274          }
2356 2275  
2357 2276          if ((zone->zone_misc_ksp = zone_misc_kstat_create(zone)) == NULL) {
2358 2277                  zone->zone_misc_stats = kmem_zalloc(
2359 2278                      sizeof (zone_misc_kstat_t), KM_SLEEP);
2360 2279          }
     2280 +
2361 2281  }
2362 2282  
2363 2283  static void
2364 2284  zone_kstat_delete_common(kstat_t **pkstat, size_t datasz)
2365 2285  {
2366 2286          void *data;
2367 2287  
2368 2288          if (*pkstat != NULL) {
2369 2289                  data = (*pkstat)->ks_data;
2370 2290                  kstat_delete(*pkstat);
↓ open down ↓ 9 lines elided ↑ open up ↑
2380 2300              sizeof (zone_kstat_t));
2381 2301          zone_kstat_delete_common(&zone->zone_swapresv_kstat,
2382 2302              sizeof (zone_kstat_t));
2383 2303          zone_kstat_delete_common(&zone->zone_physmem_kstat,
2384 2304              sizeof (zone_kstat_t));
2385 2305          zone_kstat_delete_common(&zone->zone_nprocs_kstat,
2386 2306              sizeof (zone_kstat_t));
2387 2307  
2388 2308          zone_kstat_delete_common(&zone->zone_vfs_ksp,
2389 2309              sizeof (zone_vfs_kstat_t));
2390      -        zone_kstat_delete_common(&zone->zone_zfs_ksp,
2391      -            sizeof (zone_zfs_kstat_t));
2392 2310          zone_kstat_delete_common(&zone->zone_mcap_ksp,
2393 2311              sizeof (zone_mcap_kstat_t));
2394 2312          zone_kstat_delete_common(&zone->zone_misc_ksp,
2395 2313              sizeof (zone_misc_kstat_t));
     2314 +
2396 2315  }
2397 2316  
2398 2317  /*
2399 2318   * Called very early on in boot to initialize the ZSD list so that
2400 2319   * zone_key_create() can be called before zone_init().  It also initializes
2401 2320   * portions of zone0 which may be used before zone_init() is called.  The
2402 2321   * variable "global_zone" will be set when zone0 is fully initialized by
2403 2322   * zone_init().
2404 2323   */
2405 2324  void
↓ open down ↓ 41 lines elided ↑ open up ↑
2447 2366          zone0.zone_psetid = ZONE_PS_INVAL;
2448 2367          zone0.zone_ncpus = 0;
2449 2368          zone0.zone_ncpus_online = 0;
2450 2369          zone0.zone_proc_initpid = 1;
2451 2370          zone0.zone_initname = initname;
2452 2371          zone0.zone_lockedmem_kstat = NULL;
2453 2372          zone0.zone_swapresv_kstat = NULL;
2454 2373          zone0.zone_physmem_kstat = NULL;
2455 2374          zone0.zone_nprocs_kstat = NULL;
2456 2375          zone0.zone_zfs_io_pri = 1;
     2376 +
2457 2377          zone0.zone_stime = 0;
2458 2378          zone0.zone_utime = 0;
2459 2379          zone0.zone_wtime = 0;
2460 2380  
2461 2381          list_create(&zone0.zone_ref_list, sizeof (zone_ref_t),
2462 2382              offsetof(zone_ref_t, zref_linkage));
2463 2383          list_create(&zone0.zone_zsd, sizeof (struct zsd_entry),
2464 2384              offsetof(struct zsd_entry, zsd_linkage));
2465 2385          list_insert_head(&zone_active, &zone0);
2466 2386  
↓ open down ↓ 90 lines elided ↑ open up ↑
2557 2477              RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
2558 2478              RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
2559 2479              FSS_MAXSHARES, FSS_MAXSHARES, &zone_cpu_shares_ops);
2560 2480  
2561 2481          rc_zone_cpu_cap = rctl_register("zone.cpu-cap",
2562 2482              RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_ALWAYS |
2563 2483              RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT |RCTL_GLOBAL_SYSLOG_NEVER |
2564 2484              RCTL_GLOBAL_INFINITE,
2565 2485              MAXCAP, MAXCAP, &zone_cpu_cap_ops);
2566 2486  
2567      -        rc_zone_cpu_baseline = rctl_register("zone.cpu-baseline",
2568      -            RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
2569      -            RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
2570      -            MAXCAP, MAXCAP, &zone_cpu_base_ops);
2571      -
2572      -        rc_zone_cpu_burst_time = rctl_register("zone.cpu-burst-time",
2573      -            RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
2574      -            RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
2575      -            INT_MAX, INT_MAX, &zone_cpu_burst_time_ops);
2576      -
2577 2487          rc_zone_zfs_io_pri = rctl_register("zone.zfs-io-priority",
2578 2488              RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
2579 2489              RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
2580 2490              16384, 16384, &zone_zfs_io_pri_ops);
2581 2491  
2582 2492          rc_zone_nlwps = rctl_register("zone.max-lwps", RCENTITY_ZONE,
2583 2493              RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT,
2584 2494              INT_MAX, INT_MAX, &zone_lwps_ops);
2585 2495  
2586 2496          rc_zone_nprocs = rctl_register("zone.max-processes", RCENTITY_ZONE,
↓ open down ↓ 26 lines elided ↑ open up ↑
2613 2523          dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
2614 2524          bzero(dval, sizeof (rctl_val_t));
2615 2525          dval->rcv_value = 1;
2616 2526          dval->rcv_privilege = RCPRIV_PRIVILEGED;
2617 2527          dval->rcv_flagaction = RCTL_LOCAL_NOACTION;
2618 2528          dval->rcv_action_recip_pid = -1;
2619 2529  
2620 2530          rde = rctl_dict_lookup("zone.cpu-shares");
2621 2531          (void) rctl_val_list_insert(&rde->rcd_default_value, dval);
2622 2532  
2623      -        /*
2624      -         * Create a rctl_val with PRIVILEGED, NOACTION, value = 1.  Then attach
2625      -         * this at the head of the rctl_dict_entry for ``zone.zfs-io-priority'.
2626      -         */
2627      -        dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
2628      -        bzero(dval, sizeof (rctl_val_t));
2629      -        dval->rcv_value = 1;
2630      -        dval->rcv_privilege = RCPRIV_PRIVILEGED;
2631      -        dval->rcv_flagaction = RCTL_LOCAL_NOACTION;
2632      -        dval->rcv_action_recip_pid = -1;
2633      -
2634      -        rde = rctl_dict_lookup("zone.zfs-io-priority");
2635      -        (void) rctl_val_list_insert(&rde->rcd_default_value, dval);
2636      -
2637 2533          rc_zone_locked_mem = rctl_register("zone.max-locked-memory",
2638 2534              RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
2639 2535              RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
2640 2536              &zone_locked_mem_ops);
2641 2537  
2642 2538          rc_zone_max_swap = rctl_register("zone.max-swap",
2643 2539              RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
2644 2540              RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
2645 2541              &zone_max_swap_ops);
2646 2542  
↓ open down ↓ 184 lines elided ↑ open up ↑
2831 2727  /*
2832 2728   * See block comment at the top of this file for information about zone
2833 2729   * status values.
2834 2730   */
2835 2731  /*
2836 2732   * Convenience function for setting zone status.
2837 2733   */
2838 2734  static void
2839 2735  zone_status_set(zone_t *zone, zone_status_t status)
2840 2736  {
2841      -        timestruc_t now;
2842      -        uint64_t t;
2843 2737  
2844 2738          nvlist_t *nvl = NULL;
2845 2739          ASSERT(MUTEX_HELD(&zone_status_lock));
2846 2740          ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE &&
2847 2741              status >= zone_status_get(zone));
2848 2742  
2849      -        /* Current time since Jan 1 1970 but consumers expect NS */
2850      -        gethrestime(&now);
2851      -        t = (now.tv_sec * NANOSEC) + now.tv_nsec;
2852      -
2853 2743          if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) ||
2854 2744              nvlist_add_string(nvl, ZONE_CB_NAME, zone->zone_name) ||
2855 2745              nvlist_add_string(nvl, ZONE_CB_NEWSTATE,
2856 2746              zone_status_table[status]) ||
2857 2747              nvlist_add_string(nvl, ZONE_CB_OLDSTATE,
2858 2748              zone_status_table[zone->zone_status]) ||
2859 2749              nvlist_add_int32(nvl, ZONE_CB_ZONEID, zone->zone_id) ||
2860      -            nvlist_add_uint64(nvl, ZONE_CB_TIMESTAMP, t) ||
     2750 +            nvlist_add_uint64(nvl, ZONE_CB_TIMESTAMP, (uint64_t)gethrtime()) ||
2861 2751              sysevent_evc_publish(zone_event_chan, ZONE_EVENT_STATUS_CLASS,
2862 2752              ZONE_EVENT_STATUS_SUBCLASS, "sun.com", "kernel", nvl, EVCH_SLEEP)) {
2863 2753  #ifdef DEBUG
2864 2754                  (void) printf(
2865 2755                      "Failed to allocate and send zone state change event.\n");
2866 2756  #endif
2867 2757          }
2868 2758          nvlist_free(nvl);
2869 2759  
2870 2760          zone->zone_status = status;
↓ open down ↓ 580 lines elided ↑ open up ↑
3451 3341          mutex_exit(&zone_status_lock);
3452 3342          zone_rele(zone);
3453 3343  }
3454 3344  
3455 3345  zoneid_t
3456 3346  getzoneid(void)
3457 3347  {
3458 3348          return (curproc->p_zone->zone_id);
3459 3349  }
3460 3350  
3461      -zoneid_t
3462      -getzonedid(void)
3463      -{
3464      -        return (curproc->p_zone->zone_did);
3465      -}
3466      -
3467 3351  /*
3468 3352   * Internal versions of zone_find_by_*().  These don't zone_hold() or
3469 3353   * check the validity of a zone's state.
3470 3354   */
3471 3355  static zone_t *
3472 3356  zone_find_all_by_id(zoneid_t zoneid)
3473 3357  {
3474 3358          mod_hash_val_t hv;
3475 3359          zone_t *zone = NULL;
3476 3360  
↓ open down ↓ 1324 lines elided ↑ open up ↑
4801 4685          error = 0;
4802 4686          *nvlp = nvl;
4803 4687  out:
4804 4688          kmem_free(kbuf, buflen);
4805 4689          if (error && nvl != NULL)
4806 4690                  nvlist_free(nvl);
4807 4691          return (error);
4808 4692  }
4809 4693  
4810 4694  int
4811      -zone_create_error(int er_error, int er_ext, int *er_out)
4812      -{
     4695 +zone_create_error(int er_error, int er_ext, int *er_out) {
4813 4696          if (er_out != NULL) {
4814 4697                  if (copyout(&er_ext, er_out, sizeof (int))) {
4815 4698                          return (set_errno(EFAULT));
4816 4699                  }
4817 4700          }
4818 4701          return (set_errno(er_error));
4819 4702  }
4820 4703  
4821 4704  static int
4822 4705  zone_set_label(zone_t *zone, const bslabel_t *lab, uint32_t doi)
↓ open down ↓ 69 lines elided ↑ open up ↑
4892 4775   *
4893 4776   * If extended error is non-null, we may use it to return more detailed
4894 4777   * error information.
4895 4778   */
4896 4779  static zoneid_t
4897 4780  zone_create(const char *zone_name, const char *zone_root,
4898 4781      const priv_set_t *zone_privs, size_t zone_privssz,
4899 4782      caddr_t rctlbuf, size_t rctlbufsz,
4900 4783      caddr_t zfsbuf, size_t zfsbufsz, int *extended_error,
4901 4784      int match, uint32_t doi, const bslabel_t *label,
4902      -    int flags, zoneid_t zone_did)
     4785 +    int flags)
4903 4786  {
4904 4787          struct zsched_arg zarg;
4905 4788          nvlist_t *rctls = NULL;
4906 4789          proc_t *pp = curproc;
4907 4790          zone_t *zone, *ztmp;
4908      -        zoneid_t zoneid, start = GLOBAL_ZONEID;
     4791 +        zoneid_t zoneid;
4909 4792          int error;
4910 4793          int error2 = 0;
4911 4794          char *str;
4912 4795          cred_t *zkcr;
4913 4796          boolean_t insert_label_hash;
4914 4797  
4915 4798          if (secpolicy_zone_config(CRED()) != 0)
4916 4799                  return (set_errno(EPERM));
4917 4800  
4918 4801          /* can't boot zone from within chroot environment */
4919 4802          if (PTOU(pp)->u_rdir != NULL && PTOU(pp)->u_rdir != rootdir)
4920 4803                  return (zone_create_error(ENOTSUP, ZE_CHROOTED,
4921 4804                      extended_error));
4922 4805  
4923      -        /*
4924      -         * As the first step of zone creation, we want to allocate a zoneid.
4925      -         * This allocation is complicated by the fact that netstacks use the
4926      -         * zoneid to determine their stackid, but netstacks themselves are
4927      -         * freed asynchronously with respect to zone destruction.  This means
4928      -         * that a netstack reference leak (or in principle, an extraordinarily
4929      -         * long netstack reference hold) could result in a zoneid being
4930      -         * allocated that in fact corresponds to a stackid from an active
4931      -         * (referenced) netstack -- unleashing all sorts of havoc when that
4932      -         * netstack is actually (re)used.  (In the abstract, we might wish a
4933      -         * zoneid to not be deallocated until its last referencing netstack
4934      -         * has been released, but netstacks lack a backpointer into their
4935      -         * referencing zone -- and changing them to have such a pointer would
4936      -         * be substantial, to put it euphemistically.)  To avoid this, we
4937      -         * detect this condition on allocation: if we have allocated a zoneid
4938      -         * that corresponds to a netstack that's still in use, we warn about
4939      -         * it (as it is much more likely to be a reference leak than an actual
4940      -         * netstack reference), free it, and allocate another.  That these
4941      -         * identifers are allocated out of an ID space assures that we won't
4942      -         * see the identifier we just allocated.
4943      -         */
4944      -        for (;;) {
4945      -                zoneid = id_alloc(zoneid_space);
4946      -
4947      -                if (!netstack_inuse_by_stackid(zoneid_to_netstackid(zoneid)))
4948      -                        break;
4949      -
4950      -                id_free(zoneid_space, zoneid);
4951      -
4952      -                if (start == GLOBAL_ZONEID) {
4953      -                        start = zoneid;
4954      -                } else if (zoneid == start) {
4955      -                        /*
4956      -                         * We have managed to iterate over the entire available
4957      -                         * zoneid space -- there are no identifiers available,
4958      -                         * presumably due to some number of leaked netstack
4959      -                         * references.  While it's in principle possible for us
4960      -                         * to continue to try, it seems wiser to give up at
4961      -                         * this point to warn and fail explicitly with a
4962      -                         * distinctive error.
4963      -                         */
4964      -                        cmn_err(CE_WARN, "zone_create() failed: all available "
4965      -                            "zone IDs have netstacks still in use");
4966      -                        return (set_errno(ENFILE));
4967      -                }
4968      -
4969      -                cmn_err(CE_WARN, "unable to reuse zone ID %d; "
4970      -                    "netstack still in use", zoneid);
4971      -        }
4972      -
4973 4806          zone = kmem_zalloc(sizeof (zone_t), KM_SLEEP);
4974      -        zone->zone_id = zoneid;
4975      -        zone->zone_did = zone_did;
     4807 +        zoneid = zone->zone_id = id_alloc(zoneid_space);
4976 4808          zone->zone_status = ZONE_IS_UNINITIALIZED;
4977 4809          zone->zone_pool = pool_default;
4978 4810          zone->zone_pool_mod = gethrtime();
4979 4811          zone->zone_psetid = ZONE_PS_INVAL;
4980 4812          zone->zone_ncpus = 0;
4981 4813          zone->zone_ncpus_online = 0;
4982 4814          zone->zone_restart_init = B_TRUE;
4983 4815          zone->zone_reboot_on_init_exit = B_FALSE;
4984 4816          zone->zone_init_status = -1;
4985 4817          zone->zone_brand = &native_brand;
↓ open down ↓ 352 lines elided ↑ open up ↑
5338 5170          return (err ? set_errno(err) : 0);
5339 5171  }
5340 5172  
5341 5173  /*
5342 5174   * Kills all user processes in the zone, waiting for them all to exit
5343 5175   * before returning.
5344 5176   */
5345 5177  static int
5346 5178  zone_empty(zone_t *zone)
5347 5179  {
5348      -        int cnt = 0;
5349 5180          int waitstatus;
5350 5181  
5351 5182          /*
5352 5183           * We need to drop zonehash_lock before killing all
5353 5184           * processes, otherwise we'll deadlock with zone_find_*
5354 5185           * which can be called from the exit path.
5355 5186           */
5356 5187          ASSERT(MUTEX_NOT_HELD(&zonehash_lock));
5357 5188          while ((waitstatus = zone_status_timedwait_sig(zone,
5358 5189              ddi_get_lbolt() + hz, ZONE_IS_EMPTY)) == -1) {
5359      -                boolean_t force = B_FALSE;
5360      -
5361      -                /* Every 30 seconds, try harder */
5362      -                if (cnt++ >= 30) {
5363      -                        cmn_err(CE_WARN, "attempt to force kill zone %d\n",
5364      -                            zone->zone_id);
5365      -                        force = B_TRUE;
5366      -                        cnt = 0;
5367      -                }
5368      -                killall(zone->zone_id, force);
     5190 +                killall(zone->zone_id);
5369 5191          }
5370 5192          /*
5371 5193           * return EINTR if we were signaled
5372 5194           */
5373 5195          if (waitstatus == 0)
5374 5196                  return (EINTR);
5375 5197          return (0);
5376 5198  }
5377 5199  
5378 5200  /*
↓ open down ↓ 784 lines elided ↑ open up ↑
6163 5985                  zbuf = kmem_alloc(bufsize, KM_SLEEP);
6164 5986                  if (copyin(buf, zbuf, bufsize) != 0) {
6165 5987                          error = EFAULT;
6166 5988                  } else {
6167 5989                          error = zone_get_network(zoneid, zbuf);
6168 5990                          if (error == 0 && copyout(zbuf, buf, bufsize) != 0)
6169 5991                                  error = EFAULT;
6170 5992                  }
6171 5993                  kmem_free(zbuf, bufsize);
6172 5994                  break;
6173      -        case ZONE_ATTR_DID:
6174      -                size = sizeof (zoneid_t);
6175      -                if (bufsize > size)
6176      -                        bufsize = size;
6177      -
6178      -                if (buf != NULL && copyout(&zone->zone_did, buf, bufsize) != 0)
6179      -                        error = EFAULT;
6180      -                break;
6181 5995          case ZONE_ATTR_SCHED_FIXEDHI:
6182 5996                  size = sizeof (boolean_t);
6183 5997                  if (bufsize > size)
6184 5998                          bufsize = size;
6185 5999  
6186 6000                  if (buf != NULL && copyout(&zone->zone_fixed_hipri, buf,
6187 6001                      bufsize) != 0)
6188 6002                          error = EFAULT;
6189 6003                  break;
6190 6004          default:
↓ open down ↓ 812 lines elided ↑ open up ↑
7003 6817                          zs.rctlbuf = (caddr_t)(unsigned long)zs32.rctlbuf;
7004 6818                          zs.rctlbufsz = zs32.rctlbufsz;
7005 6819                          zs.zfsbuf = (caddr_t)(unsigned long)zs32.zfsbuf;
7006 6820                          zs.zfsbufsz = zs32.zfsbufsz;
7007 6821                          zs.extended_error =
7008 6822                              (int *)(unsigned long)zs32.extended_error;
7009 6823                          zs.match = zs32.match;
7010 6824                          zs.doi = zs32.doi;
7011 6825                          zs.label = (const bslabel_t *)(uintptr_t)zs32.label;
7012 6826                          zs.flags = zs32.flags;
7013      -                        zs.zoneid = zs32.zoneid;
7014 6827  #else
7015 6828                          panic("get_udatamodel() returned bogus result\n");
7016 6829  #endif
7017 6830                  }
7018 6831  
7019 6832                  return (zone_create(zs.zone_name, zs.zone_root,
7020 6833                      zs.zone_privs, zs.zone_privssz,
7021 6834                      (caddr_t)zs.rctlbuf, zs.rctlbufsz,
7022 6835                      (caddr_t)zs.zfsbuf, zs.zfsbufsz,
7023 6836                      zs.extended_error, zs.match, zs.doi,
7024      -                    zs.label, zs.flags, zs.zoneid));
     6837 +                    zs.label, zs.flags));
7025 6838          case ZONE_BOOT:
7026 6839                  return (zone_boot((zoneid_t)(uintptr_t)arg1));
7027 6840          case ZONE_DESTROY:
7028 6841                  return (zone_destroy((zoneid_t)(uintptr_t)arg1));
7029 6842          case ZONE_GETATTR:
7030 6843                  return (zone_getattr((zoneid_t)(uintptr_t)arg1,
7031 6844                      (int)(uintptr_t)arg2, arg3, (size_t)arg4));
7032 6845          case ZONE_SETATTR:
7033 6846                  return (zone_setattr((zoneid_t)(uintptr_t)arg1,
7034 6847                      (int)(uintptr_t)arg2, arg3, (size_t)arg4));
↓ open down ↓ 258 lines elided ↑ open up ↑
7293 7106           * Prevent future zone_enter()s
7294 7107           */
7295 7108          zone_status_set(zone, ZONE_IS_SHUTTING_DOWN);
7296 7109          mutex_exit(&zone_status_lock);
7297 7110  
7298 7111          /*
7299 7112           * Kill everyone now and call zoneadmd later.
7300 7113           * zone_ki_call_zoneadmd() will do a more thorough job of this
7301 7114           * later.
7302 7115           */
7303      -        killall(zone->zone_id, B_FALSE);
     7116 +        killall(zone->zone_id);
7304 7117          /*
7305 7118           * Now, create the thread to contact zoneadmd and do the rest of the
7306 7119           * work.  This thread can't be created in our zone otherwise
7307 7120           * zone_destroy() would deadlock.
7308 7121           */
7309 7122          zargp = kmem_zalloc(sizeof (*zargp), KM_SLEEP);
7310 7123          zargp->arg.cmd = zcmd;
7311 7124          zargp->arg.uniqid = zone->zone_uniqid;
7312 7125          zargp->zone = zone;
7313 7126          (void) strcpy(zargp->arg.locale, "C");
↓ open down ↓ 615 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX