145 */
146 uint64_t scale_rss = 0;
147 uint64_t prev_fast_rss = 0;
148 uint64_t fast_rss = 0;
149 uint64_t accurate_rss = 0;
150
151 static char zoneproc[MAXPATHLEN];
152 static char debug_log[MAXPATHLEN];
153 static zoneid_t zid;
154 static mutex_t shutdown_mx;
155 static cond_t shutdown_cv;
156 static int shutting_down = 0;
157 static thread_t mcap_tid;
158 static FILE *debug_log_fp = NULL;
159 static uint64_t zone_rss_cap; /* RSS cap(KB) */
160 static char over_cmd[2 * BUFSIZ]; /* same size as zone_attr_value */
161 static boolean_t skip_vmusage = B_FALSE;
162 static boolean_t skip_pageout = B_FALSE;
163 static boolean_t skip_pf_throttle = B_FALSE;
164
165 static zlog_t *logp;
166
167 static int64_t check_suspend();
168 static void get_mcap_tunables();
169
170 /*
171 * Structure to hold current state about a process address space that we're
172 * working on.
173 */
174 typedef struct {
175 int pr_curr; /* the # of the mapping we're working on */
176 int pr_nmap; /* number of mappings in address space */
177 prmap_t *pr_mapp; /* process's map array */
178 } proc_map_t;
179
180 typedef struct zsd_vmusage64 {
181 id_t vmu_zoneid;
182 uint_t vmu_type;
183 id_t vmu_id;
184 /*
185 * An amd64 kernel will align the following uint64_t members, but a
186 * 32bit i386 process will not without help.
371 if (pmp->pr_mflags & MA_ISM || pmp->pr_mflags & MA_SHM)
372 return (0);
373
374 errno = 0;
375 res = syscall(SYS_rusagesys, _RUSAGESYS_INVALMAP, pid, pmp->pr_vaddr,
376 pmp->pr_size);
377
378 return (res);
379 }
380
381 /*
382 * Work through a process paging out mappings until the whole address space was
383 * examined or the excess is < 0. Return our estimate of the updated excess.
384 */
385 static int64_t
386 pageout_process(pid_t pid, int64_t excess)
387 {
388 int psfd;
389 prmap_t *pmap;
390 proc_map_t cur;
391 int res;
392 int64_t sum_d_rss, d_rss;
393 int64_t old_rss;
394 int map_cnt;
395 psinfo_t psinfo;
396 char pathbuf[MAXPATHLEN];
397
398 (void) snprintf(pathbuf, sizeof (pathbuf), "%s/%d/psinfo", zoneproc,
399 pid);
400 if ((psfd = open(pathbuf, O_RDONLY, 0000)) < 0)
401 return (excess);
402
403 cur.pr_mapp = NULL;
404
405 if (pread(psfd, &psinfo, sizeof (psinfo), 0) != sizeof (psinfo))
406 goto done;
407
408 old_rss = (int64_t)psinfo.pr_rssize;
409 map_cnt = 0;
410
411 /* If unscannable, skip it. */
423 }
424
425 /* Get segment residency information. */
426 pmap = init_map(&cur, pid);
427
428 /* Skip process if it has no mappings. */
429 if (pmap == NULL) {
430 debug("pid %ld: map unreadable; ignoring\n", pid);
431 goto done;
432 }
433
434 debug("pid %ld: nmap %d sz %dKB rss %lldKB %s\n",
435 pid, cur.pr_nmap, psinfo.pr_size, old_rss, psinfo.pr_psargs);
436
437 /*
438 * Within the process's address space, attempt to page out mappings.
439 */
440 sum_d_rss = 0;
441 while (excess > 0 && pmap != NULL && !shutting_down) {
442 /* invalidate the entire mapping */
443 if ((res = pageout_mapping(pid, pmap)) < 0)
444 debug("pid %ld: mapping 0x%p %ldkb unpageable (%d)\n",
445 pid, pmap->pr_vaddr, pmap->pr_size / 1024, errno);
446
447 map_cnt++;
448
449 /*
450 * Re-check the process rss and get the delta.
451 */
452 if (pread(psfd, &psinfo, sizeof (psinfo), 0)
453 != sizeof (psinfo)) {
454 excess -= old_rss;
455 goto done;
456 }
457
458 d_rss = (int64_t)psinfo.pr_rssize - old_rss;
459 old_rss = (int64_t)psinfo.pr_rssize;
460 sum_d_rss += d_rss;
461
462 /*
463 * d_rss hopefully should be negative (or 0 if nothing
464 * invalidated) but can be positive if more got paged in.
465 */
1139
1140 debug("process pass done; excess %lld\n", (long long)excess);
1141 rewinddir(pdir);
1142
1143 if (skip_pageout)
1144 (void) sleep_shutdown(120);
1145 }
1146
1147 if (pdir != NULL)
1148 (void) closedir(pdir);
1149 debug("thread shutdown\n");
1150 }
1151
1152 void
1153 create_mcap_thread(zlog_t *zlogp, zoneid_t id)
1154 {
1155 int res;
1156
1157 shutting_down = 0;
1158 zid = id;
1159 logp = zlogp;
1160
1161 /* all but the lx brand currently use /proc */
1162 if (strcmp(brand_name, "lx") == 0) {
1163 (void) snprintf(zoneproc, sizeof (zoneproc),
1164 "%s/root/native/proc", zonepath);
1165 } else {
1166 (void) snprintf(zoneproc, sizeof (zoneproc), "%s/root/proc",
1167 zonepath);
1168 }
1169
1170 (void) snprintf(debug_log, sizeof (debug_log), "%s/mcap_debug.log",
1171 zonepath);
1172
1173 res = thr_create(NULL, NULL, (void *(*)(void *))mcap_zone, NULL, NULL,
1174 &mcap_tid);
1175 if (res != 0) {
1176 zerror(zlogp, B_FALSE, "error %d creating memory cap thread",
1177 res);
1178 mcap_tid = 0;
1179 }
|
145 */
146 uint64_t scale_rss = 0;
147 uint64_t prev_fast_rss = 0;
148 uint64_t fast_rss = 0;
149 uint64_t accurate_rss = 0;
150
151 static char zoneproc[MAXPATHLEN];
152 static char debug_log[MAXPATHLEN];
153 static zoneid_t zid;
154 static mutex_t shutdown_mx;
155 static cond_t shutdown_cv;
156 static int shutting_down = 0;
157 static thread_t mcap_tid;
158 static FILE *debug_log_fp = NULL;
159 static uint64_t zone_rss_cap; /* RSS cap(KB) */
160 static char over_cmd[2 * BUFSIZ]; /* same size as zone_attr_value */
161 static boolean_t skip_vmusage = B_FALSE;
162 static boolean_t skip_pageout = B_FALSE;
163 static boolean_t skip_pf_throttle = B_FALSE;
164
165 static int64_t check_suspend();
166 static void get_mcap_tunables();
167
168 /*
169 * Structure to hold current state about a process address space that we're
170 * working on.
171 */
172 typedef struct {
173 int pr_curr; /* the # of the mapping we're working on */
174 int pr_nmap; /* number of mappings in address space */
175 prmap_t *pr_mapp; /* process's map array */
176 } proc_map_t;
177
178 typedef struct zsd_vmusage64 {
179 id_t vmu_zoneid;
180 uint_t vmu_type;
181 id_t vmu_id;
182 /*
183 * An amd64 kernel will align the following uint64_t members, but a
184 * 32bit i386 process will not without help.
369 if (pmp->pr_mflags & MA_ISM || pmp->pr_mflags & MA_SHM)
370 return (0);
371
372 errno = 0;
373 res = syscall(SYS_rusagesys, _RUSAGESYS_INVALMAP, pid, pmp->pr_vaddr,
374 pmp->pr_size);
375
376 return (res);
377 }
378
379 /*
380 * Work through a process paging out mappings until the whole address space was
381 * examined or the excess is < 0. Return our estimate of the updated excess.
382 */
383 static int64_t
384 pageout_process(pid_t pid, int64_t excess)
385 {
386 int psfd;
387 prmap_t *pmap;
388 proc_map_t cur;
389 int64_t sum_d_rss, d_rss;
390 int64_t old_rss;
391 int map_cnt;
392 psinfo_t psinfo;
393 char pathbuf[MAXPATHLEN];
394
395 (void) snprintf(pathbuf, sizeof (pathbuf), "%s/%d/psinfo", zoneproc,
396 pid);
397 if ((psfd = open(pathbuf, O_RDONLY, 0000)) < 0)
398 return (excess);
399
400 cur.pr_mapp = NULL;
401
402 if (pread(psfd, &psinfo, sizeof (psinfo), 0) != sizeof (psinfo))
403 goto done;
404
405 old_rss = (int64_t)psinfo.pr_rssize;
406 map_cnt = 0;
407
408 /* If unscannable, skip it. */
420 }
421
422 /* Get segment residency information. */
423 pmap = init_map(&cur, pid);
424
425 /* Skip process if it has no mappings. */
426 if (pmap == NULL) {
427 debug("pid %ld: map unreadable; ignoring\n", pid);
428 goto done;
429 }
430
431 debug("pid %ld: nmap %d sz %dKB rss %lldKB %s\n",
432 pid, cur.pr_nmap, psinfo.pr_size, old_rss, psinfo.pr_psargs);
433
434 /*
435 * Within the process's address space, attempt to page out mappings.
436 */
437 sum_d_rss = 0;
438 while (excess > 0 && pmap != NULL && !shutting_down) {
439 /* invalidate the entire mapping */
440 if (pageout_mapping(pid, pmap) < 0)
441 debug("pid %ld: mapping 0x%p %ldkb unpageable (%d)\n",
442 pid, (void *)pmap->pr_vaddr,
443 (long)pmap->pr_size / 1024L, errno);
444
445 map_cnt++;
446
447 /*
448 * Re-check the process rss and get the delta.
449 */
450 if (pread(psfd, &psinfo, sizeof (psinfo), 0)
451 != sizeof (psinfo)) {
452 excess -= old_rss;
453 goto done;
454 }
455
456 d_rss = (int64_t)psinfo.pr_rssize - old_rss;
457 old_rss = (int64_t)psinfo.pr_rssize;
458 sum_d_rss += d_rss;
459
460 /*
461 * d_rss hopefully should be negative (or 0 if nothing
462 * invalidated) but can be positive if more got paged in.
463 */
1137
1138 debug("process pass done; excess %lld\n", (long long)excess);
1139 rewinddir(pdir);
1140
1141 if (skip_pageout)
1142 (void) sleep_shutdown(120);
1143 }
1144
1145 if (pdir != NULL)
1146 (void) closedir(pdir);
1147 debug("thread shutdown\n");
1148 }
1149
1150 void
1151 create_mcap_thread(zlog_t *zlogp, zoneid_t id)
1152 {
1153 int res;
1154
1155 shutting_down = 0;
1156 zid = id;
1157
1158 /* all but the lx brand currently use /proc */
1159 if (strcmp(brand_name, "lx") == 0) {
1160 (void) snprintf(zoneproc, sizeof (zoneproc),
1161 "%s/root/native/proc", zonepath);
1162 } else {
1163 (void) snprintf(zoneproc, sizeof (zoneproc), "%s/root/proc",
1164 zonepath);
1165 }
1166
1167 (void) snprintf(debug_log, sizeof (debug_log), "%s/mcap_debug.log",
1168 zonepath);
1169
1170 res = thr_create(NULL, NULL, (void *(*)(void *))mcap_zone, NULL, NULL,
1171 &mcap_tid);
1172 if (res != 0) {
1173 zerror(zlogp, B_FALSE, "error %d creating memory cap thread",
1174 res);
1175 mcap_tid = 0;
1176 }
|