Print this page
    
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/cmd/rcap/rcapd/rcapd_scanner.c
          +++ new/usr/src/cmd/rcap/rcapd/rcapd_scanner.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   * Copyright 2012 Joyent, Inc.  All rights reserved.
  25   25   */
  26   26  
  27   27  #pragma ident   "%Z%%M% %I%     %E% SMI"
  28   28  
  29   29  #include <sys/mman.h>
  30   30  #include <sys/param.h>
  31   31  #include <sys/stat.h>
  32   32  #include <sys/types.h>
  33   33  #include <assert.h>
  34   34  #include <errno.h>
  35   35  #include <fcntl.h>
  36   36  #include <libproc.h>
  37   37  #include <limits.h>
  38   38  #include <procfs.h>
  39   39  #include <stdio.h>
  40   40  #include <stdlib.h>
  41   41  #include <strings.h>
  42   42  #include <time.h>
  43   43  #include <unistd.h>
  44   44  #include "rcapd.h"
  45   45  #include "rcapd_rfd.h"
  46   46  #include "rcapd_mapping.h"
  47   47  #include "utils.h"
  48   48  
  49   49  static int lpc_xmap_update(lprocess_t *);
  50   50  #ifdef DEBUG
  51   51  extern int lmapping_dump_diff(lmapping_t *lm1, lmapping_t *lm2);
  52   52  #endif /* DEBUG */
  53   53  
  54   54  /*
  55   55   * The number of file descriptors required to grab a process and create an
  56   56   * agent in it.
  57   57   */
  58   58  #define PGRAB_FD_COUNT          10
  59   59  
  60   60  /*
  61   61   * Record a position in an address space as it corresponds to a prpageheader_t
  62   62   * and affiliated structures.
  63   63   */
  64   64  typedef struct prpageheader_cur {
  65   65          int pr_nmap;            /* number of mappings in address space */
  66   66          int pr_map;             /* number of this mapping */
  67   67          uint64_t pr_pgoff;      /* page offset into mapping */
  68   68          uint64_t pr_npage;      /* number of pages in mapping */
  69   69          uint64_t pr_pagesize;   /* page size of mapping */
  70   70          uintptr_t pr_addr;      /* base of mapping */
  71   71          prpageheader_t *pr_prpageheader;        /* associated page header */
  72   72          void *pr_pdaddr;        /* address of page's byte in pagedata */
  73   73          prxmap_t *pr_xmap;      /* array containing per-segment information */
  74   74          int pr_nxmap;           /* number of xmaps in array */
  75   75          int64_t pr_rss;         /* number of resident pages in mapping, */
  76   76                                  /* or -1 if xmap is out of sync */
  77   77          int64_t pr_pg_rss;      /* number of pageable pages in mapping, or -1 */
  78   78  } prpageheader_cur_t;
  79   79  
  80   80  static struct ps_prochandle *scan_pr;   /* currently-scanned process's handle */
  81   81  
  82   82  typedef enum {
  83   83          STDL_NORMAL,
  84   84          STDL_HIGH
  85   85  } st_debug_level_t;
  86   86  
  87   87  /*
  88   88   * Output a scanning-related debug message.
  89   89   */
  90   90  /*PRINTFLIKE3*/ /*ARGSUSED*/
  91   91  static void
  92   92  st_debug(st_debug_level_t level, lcollection_t *lcol, char *msg, ...)
  93   93  {
  94   94  #ifdef DEBUG_MSG
  95   95          va_list alist;
  96   96          char *buf;
  97   97          size_t len;
  98   98  
  99   99          if (get_message_priority() < ((level == STDL_HIGH) ? RCM_DEBUG_HIGH
 100  100              : RCM_DEBUG))
 101  101                  return;
 102  102  
 103  103          len = strlen(msg) + LINELEN;
 104  104          buf = malloc(len);
 105  105          if (buf == NULL)
 106  106                  return;
 107  107          (void) snprintf(buf, len, "%s %s scanner %s",
 108  108              (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? "project" : "zone"),
 109  109              lcol->lcol_name, msg);
 110  110  
 111  111          va_start(alist, msg);
 112  112          vdprintfe(RCM_DEBUG, buf, alist);
 113  113          va_end(alist);
 114  114  
 115  115          free(buf);
 116  116  #endif /* DEBUG_MSG */
 117  117  }
 118  118  
 119  119  /*
 120  120   * Determine the collection's current victim, based on its last.  The last will
 121  121   * be returned, or, if invalid, any other valid process, if the collection has
 122  122   * any.
 123  123   */
 124  124  static lprocess_t *
 125  125  get_valid_victim(lcollection_t *lcol, lprocess_t *lpc)
 126  126  {
 127  127          if (lpc == NULL || !lcollection_member(lcol, lpc))
 128  128                  lpc = lcol->lcol_lprocess;
 129  129  
 130  130          /*
 131  131           * Find the next scannable process, and make it the victim.
 132  132           */
 133  133          while (lpc != NULL && lpc->lpc_unscannable != 0)
 134  134                  lpc = lpc->lpc_next;
 135  135  
 136  136          return (lpc);
 137  137  }
 138  138  
 139  139  /*
 140  140   * Get a process's combined current pagedata (per-page referenced and modified
 141  141   * bits) and set the supplied pointer to it.  The caller is responsible for
 142  142   * freeing the data.  If the pagedata is unreadable, a nonzero value is
 143  143   * returned, and errno is set.  Otherwise, 0 is returned.
 144  144   */
 145  145  static int
 146  146  get_pagedata(prpageheader_t **pghpp, int fd)
 147  147  {
 148  148          int res;
 149  149          struct stat st;
 150  150  
 151  151  redo:
 152  152          errno = 0;
 153  153          if (fstat(fd, &st) != 0) {
 154  154                  debug("cannot stat pagedata\n");
 155  155                  return (-1);
 156  156          }
 157  157  
 158  158          errno = 0;
 159  159          *pghpp = malloc(st.st_size);
 160  160          if (*pghpp == NULL) {
 161  161                  debug("cannot malloc() %ld bytes for pagedata", st.st_size);
 162  162                  return (-1);
 163  163          }
 164  164          (void) bzero(*pghpp, st.st_size);
 165  165  
 166  166          errno = 0;
 167  167          if ((res = read(fd, *pghpp, st.st_size)) != st.st_size) {
 168  168                  free(*pghpp);
 169  169                  *pghpp = NULL;
 170  170                  if (res > 0 || errno == E2BIG) {
 171  171                          debug("pagedata changed size, retrying\n");
 172  172                          goto redo;
 173  173                  } else {
 174  174                          debug("cannot read pagedata");
 175  175                          return (-1);
 176  176                  }
 177  177          }
 178  178  
 179  179          return (0);
 180  180  }
 181  181  
 182  182  /*
 183  183   * Return the count of kilobytes of pages represented by the given pagedata
 184  184   * which meet the given criteria, having pages which are in all of the states
 185  185   * specified by the mask, and in none of the states in the notmask.  If the
 186  186   * CP_CLEAR flag is set, the pagedata will also be cleared.
 187  187   */
 188  188  #define CP_CLEAR        1
 189  189  static uint64_t
 190  190  count_pages(prpageheader_t *pghp, int flags, int mask, int notmask)
 191  191  {
 192  192          int map;
 193  193          caddr_t cur, end;
 194  194          prpageheader_t pgh = *pghp;
 195  195          prasmap_t *asmapp;
 196  196          uint64_t count = 0;
 197  197  
 198  198          cur = (caddr_t)pghp + sizeof (*pghp);
 199  199          for (map = 0; map < pgh.pr_nmap; map++) {
 200  200                  asmapp = (prasmap_t *)(uintptr_t)cur;
 201  201                  cur += sizeof (*asmapp);
 202  202                  end = cur + asmapp->pr_npage;
 203  203                  while (cur < end) {
 204  204                          if ((*cur & mask) == mask && (*cur & notmask) == 0)
 205  205                                  count += asmapp->pr_pagesize / 1024;
 206  206                          if ((flags & CP_CLEAR) != 0)
 207  207                                  *cur = 0;
 208  208                          cur++;
 209  209                  }
 210  210  
 211  211                  /*
 212  212                   * Skip to next 64-bit-aligned address to get the next
 213  213                   * prasmap_t.
 214  214                   */
 215  215                  cur = (caddr_t)((intptr_t)(cur + 7) & ~7);
 216  216          }
 217  217  
 218  218          return (count);
 219  219  }
 220  220  
 221  221  /*
 222  222   * Return the amount of memory (in kilobytes) that hasn't been referenced or
 223  223   * modified, which memory which will be paged out first.  Should be written to
 224  224   * exclude nonresident pages when sufficient interfaces exist.
 225  225   */
 226  226  static uint64_t
 227  227  unrm_size(lprocess_t *lpc)
 228  228  {
 229  229          return (count_pages(lpc->lpc_prpageheader, CP_CLEAR,
 230  230              0, PG_MODIFIED | PG_REFERENCED));
 231  231  }
 232  232  
 233  233  /*
 234  234   * Advance a prpageheader_cur_t to the address space's next mapping, returning
 235  235   * its address, or NULL if there is none.  Any known nonpageable or nonresident
 236  236   * mappings will be skipped over.
 237  237   */
 238  238  static uintptr_t
 239  239  advance_prpageheader_cur_nextmapping(prpageheader_cur_t *pcp)
 240  240  {
 241  241          prasmap_t *pap;
 242  242          int i;
 243  243  
 244  244  next:
 245  245          ASSERT(pcp->pr_map < pcp->pr_nmap);
 246  246          if ((pcp->pr_map + 1) == pcp->pr_nmap)
 247  247                  return (NULL);
 248  248          pcp->pr_map++;
 249  249          if (pcp->pr_pgoff < pcp->pr_npage) {
 250  250                  pcp->pr_pdaddr = (caddr_t)(uintptr_t)
 251  251                      ((uintptr_t)pcp->pr_pdaddr +
 252  252                      (pcp->pr_npage - pcp->pr_pgoff));
 253  253                  pcp->pr_pgoff = pcp->pr_npage;
 254  254          }
 255  255          /*
 256  256           * Skip to next 64-bit-aligned address to get the next prasmap_t.
 257  257           */
 258  258          pcp->pr_pdaddr = (caddr_t)(((uintptr_t)pcp->pr_pdaddr + 7) & ~7);
 259  259          pap = (prasmap_t *)pcp->pr_pdaddr;
 260  260          pcp->pr_pgoff = 0;
 261  261          pcp->pr_npage = pap->pr_npage;
 262  262          pcp->pr_pagesize = pap->pr_pagesize;
 263  263          pcp->pr_addr = pap->pr_vaddr;
 264  264          pcp->pr_pdaddr = pap + 1;
 265  265  
 266  266          /*
 267  267           * Skip any known nonpageable mappings.  Currently, the only one
 268  268           * detected is the schedctl page.
 269  269           */
 270  270          if ((pap->pr_mflags ^ (MA_SHARED | MA_READ | MA_WRITE | MA_EXEC |
 271  271              MA_ANON)) == 0 && pap->pr_npage == 1) {
 272  272                  debug("identified nonpageable schedctl mapping at %p\n",
 273  273                      (void *)pcp->pr_addr);
 274  274                  goto next;
 275  275          }
 276  276  
 277  277          /*
 278  278           * Skip mappings with no resident pages.  If the xmap does not
 279  279           * correspond to the pagedata for any reason, it will be ignored.
 280  280           */
 281  281          pcp->pr_rss = -1;
 282  282          pcp->pr_pg_rss = -1;
 283  283          for (i = 0; i < pcp->pr_nxmap; i++) {
 284  284                  prxmap_t *xmap = &pcp->pr_xmap[i];
 285  285  
 286  286                  if (pcp->pr_addr == xmap->pr_vaddr && xmap->pr_size ==
 287  287                      (pcp->pr_npage * pcp->pr_pagesize)) {
 288  288                          pcp->pr_rss = xmap->pr_rss;
 289  289                          /*
 290  290                           * Remove COW pages from the pageable RSS count.
 291  291                           */
 292  292                          if ((xmap->pr_mflags & MA_SHARED) == 0)
 293  293                                  pcp->pr_pg_rss = xmap->pr_anon;
 294  294                          break;
 295  295                  }
 296  296          }
 297  297          if (pcp->pr_rss == 0) {
 298  298                  debug("identified nonresident mapping at 0x%p\n",
 299  299                      (void *)pcp->pr_addr);
 300  300                  goto next;
 301  301          } else if (pcp->pr_pg_rss == 0) {
 302  302                  debug("identified unpageable mapping at 0x%p\n",
 303  303                      (void *)pcp->pr_addr);
 304  304                  goto next;
 305  305          }
 306  306  
 307  307          return (pcp->pr_addr);
 308  308  }
 309  309  
 310  310  /*
 311  311   * Advance a prpageheader_cur_t to the mapping's next page, returning its
 312  312   * address, or NULL if there is none.
 313  313   */
 314  314  static void *
 315  315  advance_prpageheader_cur(prpageheader_cur_t *pcp)
 316  316  {
 317  317          ASSERT(pcp->pr_pgoff < pcp->pr_npage);
 318  318          if ((pcp->pr_pgoff + 1) == pcp->pr_npage)
 319  319                  return (NULL);
 320  320          pcp->pr_pdaddr = (caddr_t)pcp->pr_pdaddr + 1;
 321  321          pcp->pr_pgoff++;
 322  322  
 323  323          ASSERT((*(char *)pcp->pr_pdaddr & ~(PG_MODIFIED | PG_REFERENCED)) == 0);
 324  324          return ((caddr_t)pcp->pr_addr + pcp->pr_pgoff * pcp->pr_pagesize);
 325  325  }
 326  326  
 327  327  /*
 328  328   * Initialize a prpageheader_cur_t, positioned at the first page of the mapping
 329  329   * of an address space.
 330  330   */
 331  331  static void *
 332  332  set_prpageheader_cur(prpageheader_cur_t *pcp, prpageheader_t *php,
 333  333      prxmap_t *xmap, int nxmap)
 334  334  {
 335  335          bzero(pcp, sizeof (*pcp));
 336  336          pcp->pr_nmap = php->pr_nmap;
 337  337          pcp->pr_map = -1;
 338  338          pcp->pr_prpageheader = php;
 339  339          pcp->pr_xmap = xmap;
 340  340          pcp->pr_nxmap = nxmap;
 341  341          pcp->pr_pdaddr = (prpageheader_t *)php + 1;
 342  342  
 343  343          return ((void *)advance_prpageheader_cur_nextmapping(pcp));
 344  344  }
 345  345  
 346  346  /*
 347  347   * Position a prpageheader_cur_t to the mapped address greater or equal to the
 348  348   * given value.
 349  349   */
 350  350  static void *
 351  351  set_prpageheader_cur_addr(prpageheader_cur_t *pcp, prpageheader_t *php,
 352  352      prxmap_t *xmap, int nxmap, void *naddr)
 353  353  {
 354  354          void *addr = set_prpageheader_cur(pcp, php, xmap, nxmap);
 355  355  
 356  356          while (addr != NULL && addr <= naddr)
 357  357                  if (naddr < (void *)((caddr_t)pcp->pr_addr +
 358  358                      pcp->pr_pagesize * pcp->pr_npage)) {
 359  359                          uint64_t pgdiff = ((uintptr_t)naddr -
 360  360                              (uintptr_t)pcp->pr_addr) / pcp->pr_pagesize;
 361  361                          pcp->pr_pgoff += pgdiff;
 362  362                          pcp->pr_pdaddr = (caddr_t)pcp->pr_pdaddr + pgdiff;
 363  363                          addr = (caddr_t)pcp->pr_addr + pcp->pr_pagesize *
 364  364                              pcp->pr_pgoff;
 365  365                          break;
 366  366                  } else
 367  367                          addr =
 368  368                              (void *)advance_prpageheader_cur_nextmapping(pcp);
 369  369  
 370  370          return (addr);
 371  371  }
 372  372  
 373  373  static void
 374  374  revoke_pagedata(rfd_t *rfd)
 375  375  {
 376  376          lprocess_t *lpc = rfd->rfd_data;
 377  377  
 378  378          st_debug(STDL_NORMAL, lpc->lpc_collection, "revoking pagedata for"
 379  379              " process %d\n", (int)lpc->lpc_pid);
 380  380          ASSERT(lpc->lpc_pgdata_fd != -1);
 381  381          lpc->lpc_pgdata_fd = -1;
 382  382  }
 383  383  
 384  384  #ifdef DEBUG
 385  385  static void
 386  386  mklmapping(lmapping_t **lm, prpageheader_t *pgh)
 387  387  {
 388  388          prpageheader_cur_t cur;
 389  389          void *addr;
 390  390  
 391  391          addr = set_prpageheader_cur(&cur, pgh, NULL, -1);
 392  392          ASSERT(*lm == NULL);
 393  393          while (addr != NULL) {
 394  394                  (void) lmapping_insert(lm, cur.pr_addr, cur.pr_npage *
 395  395                      cur.pr_pagesize);
 396  396                  addr = (void *)advance_prpageheader_cur_nextmapping(&cur);
 397  397          }
 398  398  }
 399  399  
 400  400  static void
 401  401  lmapping_dump(lmapping_t *lm)
 402  402  {
 403  403          debug("lm: %p\n", (void *)lm);
 404  404          while (lm != NULL) {
 405  405                  debug("\t(%p, %llx\n", (void *)lm->lm_addr,
 406  406                      (unsigned long long)lm->lm_size);
 407  407                  lm = lm->lm_next;
 408  408          }
 409  409  }
 410  410  #endif /* DEBUG */
 411  411  
 412  412  /*
 413  413   * OR two prpagedata_t which are supposedly snapshots of the same address
 414  414   * space.  Intersecting mappings with different page sizes are tolerated but
 415  415   * not normalized (not accurate).  If the mappings of the two snapshots differ
 416  416   * in any regard, the supplied mappings_changed flag will be set.
 417  417   */
 418  418  static void
 419  419  OR_pagedata(prpageheader_t *src, prpageheader_t *dst, int *mappings_changedp)
 420  420  {
 421  421          prpageheader_cur_t src_cur;
 422  422          prpageheader_cur_t dst_cur;
 423  423          uintptr_t src_addr;
 424  424          uintptr_t dst_addr;
 425  425          int mappings_changed = 0;
 426  426  
 427  427          /*
 428  428           * OR source pagedata with the destination, for pages of intersecting
 429  429           * mappings.
 430  430           */
 431  431          src_addr = (uintptr_t)set_prpageheader_cur(&src_cur, src, NULL, -1);
 432  432          dst_addr = (uintptr_t)set_prpageheader_cur(&dst_cur, dst, NULL, -1);
 433  433          while (src_addr != NULL && dst_addr != NULL) {
 434  434                  while (src_addr == dst_addr && src_addr != NULL) {
 435  435                          *(char *)dst_cur.pr_pdaddr |=
 436  436                              *(char *)src_cur.pr_pdaddr;
 437  437                          src_addr = (uintptr_t)advance_prpageheader_cur(
 438  438                              &src_cur);
 439  439                          dst_addr = (uintptr_t)advance_prpageheader_cur(
 440  440                              &dst_cur);
 441  441                  }
 442  442                  if (src_addr != dst_addr)
 443  443                          mappings_changed = 1;
 444  444                  src_addr = advance_prpageheader_cur_nextmapping(&src_cur);
 445  445                  dst_addr = advance_prpageheader_cur_nextmapping(&dst_cur);
 446  446                  while (src_addr != dst_addr && src_addr != NULL && dst_addr !=
 447  447                      NULL) {
 448  448                          mappings_changed = 1;
 449  449                          if (src_addr < dst_addr)
 450  450                                  src_addr = advance_prpageheader_cur_nextmapping(
 451  451                                      &src_cur);
 452  452                          else
 453  453                                  dst_addr = advance_prpageheader_cur_nextmapping(
 454  454                                      &dst_cur);
 455  455                  }
 456  456          }
 457  457  
 458  458          *mappings_changedp = mappings_changed;
 459  459  }
 460  460  
 461  461  /*
 462  462   * Merge the current pagedata with that on hand.  If the pagedata is
 463  463   * unretrievable for any reason, such as the process having exited or being a
 464  464   * zombie, a nonzero value is returned, the process should be marked
 465  465   * unscannable, and future attempts to scan it should be avoided, since the
 466  466   * symptom is probably permament.  If the mappings of either pagedata
 467  467   * differ in any respect, the supplied callback will be invoked once.
 468  468   */
 469  469  static int
 470  470  merge_current_pagedata(lprocess_t *lpc,
 471  471      void(*mappings_changed_cb) (lprocess_t *))
 472  472  {
 473  473          prpageheader_t *pghp;
 474  474          int mappings_changed = 0;
 475  475          uint64_t cnt;
 476  476  
 477  477          if (lpc->lpc_pgdata_fd < 0 || get_pagedata(&pghp, lpc->lpc_pgdata_fd) !=
 478  478              0) {
 479  479                  char pathbuf[PROC_PATH_MAX];
 480  480  
 481  481                  (void) snprintf(pathbuf, sizeof (pathbuf), "/proc/%d/pagedata",
 482  482                      (int)lpc->lpc_pid);
 483  483                  if ((lpc->lpc_pgdata_fd = rfd_open(pathbuf, 1, RFD_PAGEDATA,
 484  484                      revoke_pagedata, lpc, O_RDONLY, 0)) < 0 ||
 485  485                      get_pagedata(&pghp, lpc->lpc_pgdata_fd) != 0)
 486  486                          return (-1);
 487  487                  debug("starting/resuming pagedata collection for %d\n",
 488  488                      (int)lpc->lpc_pid);
 489  489          }
 490  490  
 491  491          cnt = count_pages(pghp, 0, PG_MODIFIED | PG_REFERENCED, 0);
 492  492          if (cnt != 0 || lpc->lpc_rss != 0)
 493  493                  debug("process %d: %llu/%llukB rfd/mdfd since last read\n",
 494  494                      (int)lpc->lpc_pid, (unsigned long long)cnt,
 495  495                      (unsigned long long)lpc->lpc_rss);
 496  496          if (lpc->lpc_prpageheader != NULL) {
 497  497                  /*
 498  498                   * OR the two snapshots.
 499  499                   */
 500  500  #ifdef DEBUG
 501  501                  lmapping_t *old = NULL;
 502  502                  lmapping_t *new = NULL;
 503  503  
 504  504                  mklmapping(&new, pghp);
 505  505                  mklmapping(&old, lpc->lpc_prpageheader);
 506  506  #endif /* DEBUG */
 507  507                  OR_pagedata(lpc->lpc_prpageheader, pghp, &mappings_changed);
 508  508  #ifdef DEBUG
 509  509                  if (((mappings_changed != 0) ^
 510  510                      (lmapping_dump_diff(old, new) != 0))) {
 511  511                          debug("lmapping_changed inconsistent with lmapping\n");
 512  512                          debug("old\n");
 513  513                          lmapping_dump(old);
 514  514                          debug("new\n");
 515  515                          lmapping_dump(new);
 516  516                          debug("ignored\n");
 517  517                          lmapping_dump(lpc->lpc_ignore);
 518  518                          ASSERT(0);
 519  519                  }
 520  520                  lmapping_free(&new);
 521  521                  lmapping_free(&old);
 522  522  #endif /* DEBUG */
 523  523                  free(lpc->lpc_prpageheader);
 524  524          } else
 525  525                  mappings_changed = 1;
 526  526          lpc->lpc_prpageheader = pghp;
 527  527  
 528  528          cnt = count_pages(pghp, 0, PG_MODIFIED | PG_REFERENCED, 0);
 529  529          if (cnt != 0 || lpc->lpc_rss != 0)
 530  530                  debug("process %d: %llu/%llukB rfd/mdfd since hand swept\n",
 531  531                      (int)lpc->lpc_pid, (unsigned long long)cnt,
 532  532                      (unsigned long long)lpc->lpc_rss);
 533  533          if (mappings_changed != 0) {
 534  534                  debug("process %d: mappings changed\n", (int)lpc->lpc_pid);
 535  535                  if (mappings_changed_cb != NULL)
 536  536                          mappings_changed_cb(lpc);
 537  537          }
 538  538          return (0);
 539  539  }
 540  540  
 541  541  /*
 542  542   * Attempt to page out a region of the given process's address space.  May
 543  543   * return nonzero if not all of the pages may are pageable, for any reason.
 544  544   */
 545  545  static int
 546  546  pageout(pid_t pid, struct ps_prochandle *Pr, caddr_t start, caddr_t end)
 547  547  {
 548  548          int res;
 549  549  
 550  550          if (end <= start)
 551  551                  return (0);
 552  552  
 553  553          errno = 0;
 554  554          res = pr_memcntl(Pr, start, (end - start), MC_SYNC,
 555  555              (caddr_t)(MS_ASYNC | MS_INVALCURPROC), 0, 0);
 556  556          debug_high("pr_memcntl [%p-%p): %d", (void *)start, (void *)end, res);
 557  557  
 558  558          /*
 559  559           * EBUSY indicates none of the pages have backing store allocated, or
 560  560           * some pages were locked, which are less interesting than other
 561  561           * conditions, which are noted.
 562  562           */
 563  563          if (res != 0)
 564  564                  if (errno == EBUSY)
 565  565                          res = 0;
 566  566                  else
 567  567                          debug("%d: can't pageout %p+%llx (errno %d)", (int)pid,
 568  568                              (void *)start, (long long)(end - start), errno);
 569  569  
 570  570          return (res);
 571  571  }
 572  572  
 573  573  /*
 574  574   * Compute the delta of the victim process's RSS since the last call.  If the
 575  575   * psinfo cannot be obtained, no work is done, and no error is returned; it is
 576  576   * up to the caller to detect the process' termination via other means.
 577  577   */
 578  578  static int64_t
 579  579  rss_delta(psinfo_t *new_psinfo, psinfo_t *old_psinfo, lprocess_t *vic)
 580  580  {
 581  581          int64_t d_rss = 0;
 582  582  
 583  583          if (get_psinfo(vic->lpc_pid, new_psinfo, vic->lpc_psinfo_fd,
 584  584              lprocess_update_psinfo_fd_cb, vic, vic) == 0) {
 585  585                  d_rss = (int64_t)new_psinfo->pr_rssize -
 586  586                      (int64_t)old_psinfo->pr_rssize;
 587  587                  if (d_rss < 0)
 588  588                          vic->lpc_collection->lcol_stat.lcols_pg_eff +=
 589  589                              (- d_rss);
 590  590                  *old_psinfo = *new_psinfo;
 591  591          }
 592  592  
 593  593          return (d_rss);
 594  594  }
 595  595  
 596  596  static void
 597  597  unignore_mappings(lprocess_t *lpc)
 598  598  {
 599  599          lmapping_free(&lpc->lpc_ignore);
 600  600  }
 601  601  
 602  602  static void
 603  603  unignore_referenced_mappings(lprocess_t *lpc)
 604  604  {
 605  605          prpageheader_cur_t cur;
 606  606          void *vicaddr;
 607  607  
 608  608          vicaddr = set_prpageheader_cur(&cur, lpc->lpc_prpageheader, NULL, -1);
 609  609          while (vicaddr != NULL) {
 610  610                  if (((*(char *)cur.pr_pdaddr) & (PG_REFERENCED | PG_MODIFIED))
 611  611                      != 0) {
 612  612                          if (lmapping_remove(&lpc->lpc_ignore, cur.pr_addr,
 613  613                              cur.pr_npage * cur.pr_pagesize) == 0)
 614  614                                  debug("removed mapping 0x%p+0t%llukB from"
 615  615                                      " ignored set\n", (void *)cur.pr_addr,
 616  616                                      (unsigned long long)(cur.pr_npage *
 617  617                                      cur.pr_pagesize / 1024));
 618  618                          vicaddr = (void *)advance_prpageheader_cur_nextmapping(
 619  619                              &cur);
 620  620                  } else if ((vicaddr = advance_prpageheader_cur(&cur)) == NULL)
 621  621                          vicaddr = (void *)advance_prpageheader_cur_nextmapping(
 622  622                              &cur);
 623  623          }
 624  624  }
 625  625  
 626  626  /*
 627  627   * Resume scanning, starting with the last victim, if it is still valid, or any
 628  628   * other one, otherwise.
 629  629   */
 630  630  void
 631  631  scan(lcollection_t *lcol, int64_t excess)
 632  632  {
 633  633          lprocess_t *vic, *lpc;
 634  634          void *vicaddr, *endaddr, *nvicaddr;
 635  635          prpageheader_cur_t cur;
 636  636          psinfo_t old_psinfo, new_psinfo;
 637  637          hrtime_t scan_start;
 638  638          int res, resumed;
 639  639          uint64_t col_unrm_size;
 640  640  
 641  641          st_debug(STDL_NORMAL, lcol, "starting to scan, excess %lldk\n",
 642  642              (long long)excess);
 643  643  
 644  644          /*
 645  645           * Determine the address to start scanning at, depending on whether
 646  646           * scanning can be resumed.
 647  647           */
 648  648          endaddr = NULL;
 649  649          if ((vic = get_valid_victim(lcol, lcol->lcol_victim)) ==
 650  650              lcol->lcol_victim && lcol->lcol_resaddr != NULL) {
 651  651                  vicaddr = lcol->lcol_resaddr;
 652  652                  st_debug(STDL_NORMAL, lcol, "resuming process %d\n",
 653  653                      (int)vic->lpc_pid);
 654  654                  resumed = 1;
 655  655          } else {
 656  656                  vicaddr = NULL;
 657  657                  resumed = 0;
 658  658          }
 659  659  
 660  660          scan_start = gethrtime();
 661  661          /*
 662  662           * Obtain the most current pagedata for the processes that might be
 663  663           * scanned, and remove from the ignored set any mappings which have
 664  664           * referenced or modified pages (in the hopes that the pageability of
 665  665           * the mapping's pages may have changed).  Determine if the
 666  666           * unreferenced and unmodified portion is impossibly small to suffice
 667  667           * to reduce the excess completely.  If so, ignore these bits so that
 668  668           * even working set will be paged out.
 669  669           */
 670  670          col_unrm_size = 0;
 671  671          lpc = vic;
 672  672          while (lpc != NULL && should_run) {
 673  673                  if (merge_current_pagedata(lpc, unignore_mappings) != 0) {
 674  674                          st_debug(STDL_NORMAL, lcol, "process %d:"
 675  675                              " exited/temporarily unscannable",
 676  676                              (int)lpc->lpc_pid);
 677  677                          goto next;
 678  678                  }
 679  679                  debug("process %d: %llu/%llukB scannable\n", (int)lpc->lpc_pid,
 680  680                      (unsigned long long)(lpc->lpc_unrm = unrm_size(lpc)),
 681  681                      (unsigned long long)lpc->lpc_size);
 682  682                  col_unrm_size += lpc->lpc_unrm = unrm_size(lpc);
 683  683  
 684  684                  if ((lcol->lcol_stat.lcols_scan_count %
 685  685                      RCAPD_IGNORED_SET_FLUSH_IVAL) == 0) {
 686  686                          /*
 687  687                           * Periodically clear the set of ignored mappings.
 688  688                           * This will allow processes whose ignored segments'
 689  689                           * pageability have changed (without a corresponding
 690  690                           * reference or modification to a page) to be
 691  691                           * recognized.
 692  692                           */
 693  693                          if (lcol->lcol_stat.lcols_scan_count > 0)
 694  694                                  unignore_mappings(lpc);
 695  695                  } else {
 696  696                          /*
 697  697                           * Ensure mappings with referenced or modified pages
 698  698                           * are not in the ignored set.  Their usage might mean
 699  699                           * the condition which made them unpageable is gone.
 700  700                           */
 701  701                          unignore_referenced_mappings(lpc);
 702  702                  }
 703  703  next:
 704  704                  lpc = lpc->lpc_next != NULL ? get_valid_victim(lcol,
 705  705                      lpc->lpc_next) : NULL;
 706  706          }
 707  707          if (col_unrm_size < excess) {
 708  708                  lpc = vic;
 709  709                  debug("will not reduce excess with only unreferenced pages\n");
 710  710                  while (lpc != NULL && should_run) {
 711  711                          if (lpc->lpc_prpageheader != NULL) {
 712  712                                  (void) count_pages(lpc->lpc_prpageheader,
 713  713                                      CP_CLEAR, 0, 0);
 714  714                                  if (lpc->lpc_pgdata_fd >= 0) {
 715  715                                          if (rfd_close(lpc->lpc_pgdata_fd) != 0)
 716  716                                                  debug("coud not close %d"
 717  717                                                      " lpc_pgdata_fd %d",
 718  718                                                      (int)lpc->lpc_pid,
 719  719                                                      lpc->lpc_pgdata_fd);
 720  720                                          lpc->lpc_pgdata_fd = -1;
 721  721                                  }
 722  722                          }
 723  723                          lpc = lpc->lpc_next != NULL ? get_valid_victim(lcol,
 724  724                              lpc->lpc_next) : NULL;
 725  725                  }
 726  726          }
 727  727  
 728  728          /*
 729  729           * Examine each process for pages to remove until the excess is
 730  730           * reduced.
 731  731           */
 732  732          while (vic != NULL && excess > 0 && should_run) {
 733  733                  /*
 734  734                   * Skip processes whose death was reported when the merging of
 735  735                   * pagedata was attempted.
 736  736                   */
 737  737                  if (vic->lpc_prpageheader == NULL)
 738  738                          goto nextproc;
 739  739  
 740  740                  /*
 741  741                   * Obtain optional segment residency information.
 742  742                   */
 743  743                  if (lpc_xmap_update(vic) != 0)
 744  744                          st_debug(STDL_NORMAL, lcol, "process %d: xmap"
 745  745                              " unreadable; ignoring", (int)vic->lpc_pid);
 746  746  
 747  747  #ifdef DEBUG_MSG
 748  748                  {
 749  749                          void *ovicaddr = vicaddr;
 750  750  #endif /* DEBUG_MSG */
 751  751                  vicaddr = set_prpageheader_cur_addr(&cur, vic->lpc_prpageheader,
 752  752                      vic->lpc_xmap, vic->lpc_nxmap, vicaddr);
 753  753  #ifdef DEBUG_MSG
 754  754                          st_debug(STDL_NORMAL, lcol, "trying to resume from"
 755  755                              " 0x%p, next 0x%p\n", ovicaddr, vicaddr);
 756  756                  }
 757  757  #endif /* DEBUG_MSG */
 758  758  
 759  759                  /*
 760  760                   * Take control of the victim.
 761  761                   */
 762  762                  if (get_psinfo(vic->lpc_pid, &old_psinfo,
 763  763                      vic->lpc_psinfo_fd, lprocess_update_psinfo_fd_cb,
 764  764                      vic, vic) != 0) {
 765  765                          st_debug(STDL_NORMAL, lcol, "cannot get %d psinfo",
 766  766                              (int)vic->lpc_pid);
 767  767                          goto nextproc;
 768  768                  }
 769  769                  (void) rfd_reserve(PGRAB_FD_COUNT);
 770  770                  if ((scan_pr = Pgrab(vic->lpc_pid, 0, &res)) == NULL) {
 771  771                          st_debug(STDL_NORMAL, lcol, "cannot grab %d (%d)",
 772  772                              (int)vic->lpc_pid, res);
 773  773                          goto nextproc;
 774  774                  }
 775  775                  if (Pcreate_agent(scan_pr) != 0) {
 776  776                          st_debug(STDL_NORMAL, lcol, "cannot control %d",
 777  777                              (int)vic->lpc_pid);
 778  778                          goto nextproc;
 779  779                  }
 780  780                  /*
 781  781                   * Be very pessimistic about the state of the agent LWP --
 782  782                   * verify it's actually stopped.
 783  783                   */
 784  784                  errno = 0;
 785  785                  while (Pstate(scan_pr) == PS_RUN)
 786  786                          (void) Pwait(scan_pr, 0);
 787  787                  if (Pstate(scan_pr) != PS_STOP) {
 788  788                          st_debug(STDL_NORMAL, lcol, "agent not in expected"
 789  789                              " state (%d)", Pstate(scan_pr));
 790  790                          goto nextproc;
 791  791                  }
 792  792  
 793  793                  /*
 794  794                   * Within the victim's address space, find contiguous ranges of
 795  795                   * unreferenced pages to page out.
 796  796                   */
 797  797                  st_debug(STDL_NORMAL, lcol, "paging out process %d\n",
 798  798                      (int)vic->lpc_pid);
 799  799                  while (excess > 0 && vicaddr != NULL && should_run) {
 800  800                          /*
 801  801                           * Skip mappings in the ignored set.  Mappings get
 802  802                           * placed in the ignored set when all their resident
 803  803                           * pages are unreference and unmodified, yet unpageable
 804  804                           * -- such as when they are locked, or involved in
 805  805                           * asynchronous I/O.  They will be scanned again when
 806  806                           * some page is referenced or modified.
 807  807                           */
 808  808                          if (lmapping_contains(vic->lpc_ignore, cur.pr_addr,
 809  809                              cur.pr_npage * cur.pr_pagesize)) {
 810  810                                  debug("ignored mapping at 0x%p\n",
 811  811                                      (void *)cur.pr_addr);
 812  812                                  /*
 813  813                                   * Update statistics.
 814  814                                   */
 815  815                                  lcol->lcol_stat.lcols_pg_att +=
 816  816                                      cur.pr_npage * cur.pr_pagesize / 1024;
 817  817  
 818  818                                  vicaddr = (void *)
 819  819                                      advance_prpageheader_cur_nextmapping(&cur);
 820  820                                  continue;
 821  821                          }
 822  822  
 823  823                          /*
 824  824                           * Determine a range of unreferenced pages to page out,
 825  825                           * and clear the R/M bits in the preceding referenced
 826  826                           * range.
 827  827                           */
 828  828                          st_debug(STDL_HIGH, lcol, "start from mapping at 0x%p,"
 829  829                              " npage %llu\n", vicaddr,
 830  830                              (unsigned long long)cur.pr_npage);
 831  831                          while (vicaddr != NULL &&
 832  832                              *(caddr_t)cur.pr_pdaddr != 0) {
 833  833                                  *(caddr_t)cur.pr_pdaddr = 0;
 834  834                                  vicaddr = advance_prpageheader_cur(&cur);
 835  835                          }
 836  836                          st_debug(STDL_HIGH, lcol, "advance, vicaddr %p, pdaddr"
 837  837                              " %p\n", vicaddr, cur.pr_pdaddr);
 838  838                          if (vicaddr == NULL) {
 839  839                                  /*
 840  840                                   * The end of mapping was reached before any
 841  841                                   * unreferenced pages were seen.
 842  842                                   */
 843  843                                  vicaddr = (void *)
 844  844                                      advance_prpageheader_cur_nextmapping(&cur);
 845  845                                  continue;
 846  846                          }
 847  847                          do
 848  848                                  endaddr = advance_prpageheader_cur(&cur);
 849  849                          while (endaddr != NULL &&
 850  850                              *(caddr_t)cur.pr_pdaddr == 0 &&
 851  851                              (((intptr_t)endaddr - (intptr_t)vicaddr) /
 852  852                                  1024) < excess);
 853  853                          st_debug(STDL_HIGH, lcol, "endaddr %p, *cur %d\n",
 854  854                              endaddr, *(caddr_t)cur.pr_pdaddr);
 855  855  
 856  856                          /*
 857  857                           * Page out from vicaddr to the end of the mapping, or
 858  858                           * endaddr if set, then continue scanning after
 859  859                           * endaddr, or the next mapping, if not set.
 860  860                           */
 861  861                          nvicaddr = endaddr;
 862  862                          if (endaddr == NULL)
 863  863                                  endaddr = (caddr_t)cur.pr_addr +
 864  864                                      cur.pr_pagesize * cur.pr_npage;
 865  865                          if (pageout(vic->lpc_pid, scan_pr, vicaddr, endaddr) ==
 866  866                              0) {
 867  867                                  int64_t d_rss, att;
 868  868                                  int willignore = 0;
 869  869  
 870  870                                  excess += (d_rss = rss_delta(
 871  871                                      &new_psinfo, &old_psinfo, vic));
 872  872  
 873  873                                  /*
 874  874                                   * If this pageout attempt was unsuccessful
 875  875                                   * (the resident portion was not affected), and
 876  876                                   * was for the whole mapping, put it in the
 877  877                                   * ignored set, so it will not be scanned again
 878  878                                   * until some page is referenced or modified.
 879  879                                   */
 880  880                                  if (d_rss >= 0 && (void *)cur.pr_addr ==
 881  881                                      vicaddr && (cur.pr_pagesize * cur.pr_npage)
 882  882                                      == ((uintptr_t)endaddr -
 883  883                                      (uintptr_t)vicaddr)) {
 884  884                                          if (lmapping_insert(
 885  885                                              &vic->lpc_ignore,
 886  886                                              cur.pr_addr,
 887  887                                              cur.pr_pagesize *
 888  888                                              cur.pr_npage) != 0)
 889  889                                                  debug("not enough memory to add"
 890  890                                                      " mapping at %p to ignored"
 891  891                                                      " set\n",
 892  892                                                      (void *)cur.pr_addr);
 893  893                                          willignore = 1;
 894  894                                  }
 895  895  
 896  896                                  /*
 897  897                                   * Update statistics.
 898  898                                   */
 899  899                                  lcol->lcol_stat.lcols_pg_att += (att =
 900  900                                      ((intptr_t)endaddr - (intptr_t)vicaddr) /
 901  901                                      1024);
 902  902                                  st_debug(STDL_NORMAL, lcol, "paged out 0x%p"
 903  903                                      "+0t(%llu/%llu)kB%s\n", vicaddr,
 904  904                                      (unsigned long long)((d_rss <
 905  905                                      0) ? - d_rss : 0), (unsigned long long)att,
 906  906                                      willignore ? " (will ignore)" : "");
 907  907                          } else {
 908  908                                  st_debug(STDL_NORMAL, lcol,
 909  909                                      "process %d: exited/unscannable\n",
 910  910                                      (int)vic->lpc_pid);
 911  911                                  vic->lpc_unscannable = 1;
 912  912                                  goto nextproc;
 913  913                          }
 914  914  
 915  915                          /*
 916  916                           * Update the statistics file, if it's time.
 917  917                           */
 918  918                          check_update_statistics();
 919  919  
 920  920                          vicaddr = (nvicaddr != NULL) ? nvicaddr : (void
 921  921                              *)advance_prpageheader_cur_nextmapping(&cur);
 922  922                  }
 923  923                  excess += rss_delta(&new_psinfo, &old_psinfo, vic);
 924  924                  st_debug(STDL_NORMAL, lcol, "done, excess %lld\n",
 925  925                      (long long)excess);
 926  926  nextproc:
 927  927                  /*
 928  928                   * If a process was grabbed, release it, destroying its agent.
 929  929                   */
 930  930                  if (scan_pr != NULL) {
 931  931                          (void) Prelease(scan_pr, 0);
 932  932                          scan_pr = NULL;
 933  933                  }
 934  934                  lcol->lcol_victim = vic;
 935  935                  /*
 936  936                   * Scan the collection at most once.  Only if scanning was not
 937  937                   * aborted for any reason, and the end of lprocess has not been
 938  938                   * reached, determine the next victim and scan it.
 939  939                   */
 940  940                  if (vic != NULL) {
 941  941                          if (vic->lpc_next != NULL) {
 942  942                                  /*
 943  943                                   * Determine the next process to be scanned.
 944  944                                   */
 945  945                                  if (excess > 0) {
 946  946                                          vic = get_valid_victim(lcol,
 947  947                                              vic->lpc_next);
 948  948                                          vicaddr = 0;
 949  949                                  }
 950  950                          } else {
 951  951                                  /*
 952  952                                   * A complete scan of the collection was made,
 953  953                                   * so tick the scan counter and stop scanning
 954  954                                   * until the next request.
 955  955                                   */
 956  956                                  lcol->lcol_stat.lcols_scan_count++;
 957  957                                  lcol->lcol_stat.lcols_scan_time_complete
 958  958                                      = lcol->lcol_stat.lcols_scan_time;
 959  959                                  /*
 960  960                                   * If an excess still exists, tick the
 961  961                                   * "ineffective scan" counter, signalling that
 962  962                                   * the cap may be uneforceable.
 963  963                                   */
 964  964                                  if (resumed == 0 && excess > 0)
 965  965                                          lcol->lcol_stat
 966  966                                              .lcols_scan_ineffective++;
 967  967                                  /*
 968  968                                   * Scanning should start at the beginning of
 969  969                                   * the process list at the next request.
 970  970                                   */
 971  971                                  if (excess > 0)
 972  972                                          vic = NULL;
 973  973                          }
 974  974                  }
 975  975          }
 976  976          lcol->lcol_stat.lcols_scan_time += (gethrtime() - scan_start);
 977  977          st_debug(STDL_HIGH, lcol, "done scanning; excess %lld\n",
 978  978              (long long)excess);
 979  979  
 980  980          lcol->lcol_resaddr = vicaddr;
 981  981          if (lcol->lcol_resaddr == NULL && lcol->lcol_victim != NULL) {
 982  982                  lcol->lcol_victim = get_valid_victim(lcol,
 983  983                      lcol->lcol_victim->lpc_next);
 984  984          }
 985  985  }
 986  986  
 987  987  /*
 988  988   * Abort the scan in progress, and destroy the agent LWP of any grabbed
 989  989   * processes.
 990  990   */
 991  991  void
 992  992  scan_abort(void)
 993  993  {
 994  994          if (scan_pr != NULL)
 995  995                  (void) Prelease(scan_pr, NULL);
 996  996  }
 997  997  
 998  998  static void
 999  999  revoke_xmap(rfd_t *rfd)
1000 1000  {
1001 1001          lprocess_t *lpc = rfd->rfd_data;
1002 1002  
1003 1003          debug("revoking xmap for process %d\n", (int)lpc->lpc_pid);
1004 1004          ASSERT(lpc->lpc_xmap_fd != -1);
1005 1005          lpc->lpc_xmap_fd = -1;
1006 1006  }
1007 1007  
1008 1008  /*
1009 1009   * Retrieve the process's current xmap , which is used to determine the size of
1010 1010   * the resident portion of its segments.  Return zero if successful.
1011 1011   */
1012 1012  static int
1013 1013  lpc_xmap_update(lprocess_t *lpc)
1014 1014  {
1015 1015          int res;
1016 1016          struct stat st;
1017 1017  
1018 1018          free(lpc->lpc_xmap);
1019 1019          lpc->lpc_xmap = NULL;
1020 1020          lpc->lpc_nxmap = -1;
1021 1021  
1022 1022          if (lpc->lpc_xmap_fd == -1) {
1023 1023                  char pathbuf[PROC_PATH_MAX];
1024 1024  
1025 1025                  (void) snprintf(pathbuf, sizeof (pathbuf), "/proc/%d/xmap",
1026 1026                      (int)lpc->lpc_pid);
1027 1027                  if ((lpc->lpc_xmap_fd = rfd_open(pathbuf, 1, RFD_XMAP,
1028 1028                      revoke_xmap, lpc, O_RDONLY, 0)) < 0)
1029 1029                          return (-1);
1030 1030          }
1031 1031  
1032 1032  redo:
1033 1033          errno = 0;
1034 1034          if (fstat(lpc->lpc_xmap_fd, &st) != 0) {
1035 1035                  debug("cannot stat xmap\n");
1036 1036                  (void) rfd_close(lpc->lpc_xmap_fd);
1037 1037                  lpc->lpc_xmap_fd = -1;
1038 1038                  return (-1);
1039 1039          }
1040 1040  
1041 1041          if ((st.st_size % sizeof (*lpc->lpc_xmap)) != 0) {
1042 1042                  debug("xmap wrong size\n");
1043 1043                  (void) rfd_close(lpc->lpc_xmap_fd);
1044 1044                  lpc->lpc_xmap_fd = -1;
1045 1045                  return (-1);
1046 1046          }
1047 1047  
1048 1048          lpc->lpc_xmap = malloc(st.st_size);
1049 1049          if (lpc->lpc_xmap == NULL) {
1050 1050                  debug("cannot malloc() %ld bytes for xmap", st.st_size);
1051 1051                  (void) rfd_close(lpc->lpc_xmap_fd);
1052 1052                  lpc->lpc_xmap_fd = -1;
1053 1053                  return (-1);
1054 1054          }
1055 1055  
1056 1056          if ((res = pread(lpc->lpc_xmap_fd, lpc->lpc_xmap, st.st_size, 0)) !=
1057 1057              st.st_size) {
1058 1058                  free(lpc->lpc_xmap);
1059 1059                  lpc->lpc_xmap = NULL;
1060 1060                  if (res > 0) {
1061 1061                          debug("xmap changed size, retrying\n");
1062 1062                          goto redo;
1063 1063                  } else {
1064 1064                          debug("cannot read xmap");
1065 1065                          return (-1);
1066 1066                  }
1067 1067          }
1068 1068          lpc->lpc_nxmap = st.st_size / sizeof (*lpc->lpc_xmap);
1069 1069  
1070 1070          return (0);
1071 1071  }
  
    | 
      ↓ open down ↓ | 
    1071 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX