Print this page
    
OS-881 To workaround OS-580 add support to only invalidate mappings from a single process
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/cmd/rcap/rcapd/rcapd_scanner.c
          +++ new/usr/src/cmd/rcap/rcapd/rcapd_scanner.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
       24 + * Copyright 2012 Joyent, Inc.  All rights reserved.
  24   25   */
  25   26  
  26   27  #pragma ident   "%Z%%M% %I%     %E% SMI"
  27   28  
  28   29  #include <sys/mman.h>
  29   30  #include <sys/param.h>
  30   31  #include <sys/stat.h>
  31   32  #include <sys/types.h>
  32   33  #include <assert.h>
  33   34  #include <errno.h>
  34   35  #include <fcntl.h>
  35   36  #include <libproc.h>
  36   37  #include <limits.h>
  37   38  #include <procfs.h>
  38   39  #include <stdio.h>
  39   40  #include <stdlib.h>
  40   41  #include <strings.h>
  41   42  #include <time.h>
  42   43  #include <unistd.h>
  43   44  #include "rcapd.h"
  44   45  #include "rcapd_rfd.h"
  45   46  #include "rcapd_mapping.h"
  46   47  #include "utils.h"
  47   48  
  48   49  static int lpc_xmap_update(lprocess_t *);
  49   50  #ifdef DEBUG
  50   51  extern int lmapping_dump_diff(lmapping_t *lm1, lmapping_t *lm2);
  51   52  #endif /* DEBUG */
  52   53  
  53   54  /*
  54   55   * The number of file descriptors required to grab a process and create an
  55   56   * agent in it.
  56   57   */
  57   58  #define PGRAB_FD_COUNT          10
  58   59  
  59   60  /*
  60   61   * Record a position in an address space as it corresponds to a prpageheader_t
  61   62   * and affiliated structures.
  62   63   */
  63   64  typedef struct prpageheader_cur {
  64   65          int pr_nmap;            /* number of mappings in address space */
  65   66          int pr_map;             /* number of this mapping */
  66   67          uint64_t pr_pgoff;      /* page offset into mapping */
  67   68          uint64_t pr_npage;      /* number of pages in mapping */
  68   69          uint64_t pr_pagesize;   /* page size of mapping */
  69   70          uintptr_t pr_addr;      /* base of mapping */
  70   71          prpageheader_t *pr_prpageheader;        /* associated page header */
  71   72          void *pr_pdaddr;        /* address of page's byte in pagedata */
  72   73          prxmap_t *pr_xmap;      /* array containing per-segment information */
  73   74          int pr_nxmap;           /* number of xmaps in array */
  74   75          int64_t pr_rss;         /* number of resident pages in mapping, */
  75   76                                  /* or -1 if xmap is out of sync */
  76   77          int64_t pr_pg_rss;      /* number of pageable pages in mapping, or -1 */
  77   78  } prpageheader_cur_t;
  78   79  
  79   80  static struct ps_prochandle *scan_pr;   /* currently-scanned process's handle */
  80   81  
  81   82  typedef enum {
  82   83          STDL_NORMAL,
  83   84          STDL_HIGH
  84   85  } st_debug_level_t;
  85   86  
  86   87  /*
  87   88   * Output a scanning-related debug message.
  88   89   */
  89   90  /*PRINTFLIKE3*/ /*ARGSUSED*/
  90   91  static void
  91   92  st_debug(st_debug_level_t level, lcollection_t *lcol, char *msg, ...)
  92   93  {
  93   94  #ifdef DEBUG_MSG
  94   95          va_list alist;
  95   96          char *buf;
  96   97          size_t len;
  97   98  
  98   99          if (get_message_priority() < ((level == STDL_HIGH) ? RCM_DEBUG_HIGH
  99  100              : RCM_DEBUG))
 100  101                  return;
 101  102  
 102  103          len = strlen(msg) + LINELEN;
 103  104          buf = malloc(len);
 104  105          if (buf == NULL)
 105  106                  return;
 106  107          (void) snprintf(buf, len, "%s %s scanner %s",
 107  108              (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? "project" : "zone"),
 108  109              lcol->lcol_name, msg);
 109  110  
 110  111          va_start(alist, msg);
 111  112          vdprintfe(RCM_DEBUG, buf, alist);
 112  113          va_end(alist);
 113  114  
 114  115          free(buf);
 115  116  #endif /* DEBUG_MSG */
 116  117  }
 117  118  
 118  119  /*
 119  120   * Determine the collection's current victim, based on its last.  The last will
 120  121   * be returned, or, if invalid, any other valid process, if the collection has
 121  122   * any.
 122  123   */
 123  124  static lprocess_t *
 124  125  get_valid_victim(lcollection_t *lcol, lprocess_t *lpc)
 125  126  {
 126  127          if (lpc == NULL || !lcollection_member(lcol, lpc))
 127  128                  lpc = lcol->lcol_lprocess;
 128  129  
 129  130          /*
 130  131           * Find the next scannable process, and make it the victim.
 131  132           */
 132  133          while (lpc != NULL && lpc->lpc_unscannable != 0)
 133  134                  lpc = lpc->lpc_next;
 134  135  
 135  136          return (lpc);
 136  137  }
 137  138  
 138  139  /*
 139  140   * Get a process's combined current pagedata (per-page referenced and modified
 140  141   * bits) and set the supplied pointer to it.  The caller is responsible for
 141  142   * freeing the data.  If the pagedata is unreadable, a nonzero value is
 142  143   * returned, and errno is set.  Otherwise, 0 is returned.
 143  144   */
 144  145  static int
 145  146  get_pagedata(prpageheader_t **pghpp, int fd)
 146  147  {
 147  148          int res;
 148  149          struct stat st;
 149  150  
 150  151  redo:
 151  152          errno = 0;
 152  153          if (fstat(fd, &st) != 0) {
 153  154                  debug("cannot stat pagedata\n");
 154  155                  return (-1);
 155  156          }
 156  157  
 157  158          errno = 0;
 158  159          *pghpp = malloc(st.st_size);
 159  160          if (*pghpp == NULL) {
 160  161                  debug("cannot malloc() %ld bytes for pagedata", st.st_size);
 161  162                  return (-1);
 162  163          }
 163  164          (void) bzero(*pghpp, st.st_size);
 164  165  
 165  166          errno = 0;
 166  167          if ((res = read(fd, *pghpp, st.st_size)) != st.st_size) {
 167  168                  free(*pghpp);
 168  169                  *pghpp = NULL;
 169  170                  if (res > 0 || errno == E2BIG) {
 170  171                          debug("pagedata changed size, retrying\n");
 171  172                          goto redo;
 172  173                  } else {
 173  174                          debug("cannot read pagedata");
 174  175                          return (-1);
 175  176                  }
 176  177          }
 177  178  
 178  179          return (0);
 179  180  }
 180  181  
 181  182  /*
 182  183   * Return the count of kilobytes of pages represented by the given pagedata
 183  184   * which meet the given criteria, having pages which are in all of the states
 184  185   * specified by the mask, and in none of the states in the notmask.  If the
 185  186   * CP_CLEAR flag is set, the pagedata will also be cleared.
 186  187   */
 187  188  #define CP_CLEAR        1
 188  189  static uint64_t
 189  190  count_pages(prpageheader_t *pghp, int flags, int mask, int notmask)
 190  191  {
 191  192          int map;
 192  193          caddr_t cur, end;
 193  194          prpageheader_t pgh = *pghp;
 194  195          prasmap_t *asmapp;
 195  196          uint64_t count = 0;
 196  197  
 197  198          cur = (caddr_t)pghp + sizeof (*pghp);
 198  199          for (map = 0; map < pgh.pr_nmap; map++) {
 199  200                  asmapp = (prasmap_t *)(uintptr_t)cur;
 200  201                  cur += sizeof (*asmapp);
 201  202                  end = cur + asmapp->pr_npage;
 202  203                  while (cur < end) {
 203  204                          if ((*cur & mask) == mask && (*cur & notmask) == 0)
 204  205                                  count += asmapp->pr_pagesize / 1024;
 205  206                          if ((flags & CP_CLEAR) != 0)
 206  207                                  *cur = 0;
 207  208                          cur++;
 208  209                  }
 209  210  
 210  211                  /*
 211  212                   * Skip to next 64-bit-aligned address to get the next
 212  213                   * prasmap_t.
 213  214                   */
 214  215                  cur = (caddr_t)((intptr_t)(cur + 7) & ~7);
 215  216          }
 216  217  
 217  218          return (count);
 218  219  }
 219  220  
 220  221  /*
 221  222   * Return the amount of memory (in kilobytes) that hasn't been referenced or
 222  223   * modified, which memory which will be paged out first.  Should be written to
 223  224   * exclude nonresident pages when sufficient interfaces exist.
 224  225   */
 225  226  static uint64_t
 226  227  unrm_size(lprocess_t *lpc)
 227  228  {
 228  229          return (count_pages(lpc->lpc_prpageheader, CP_CLEAR,
 229  230              0, PG_MODIFIED | PG_REFERENCED));
 230  231  }
 231  232  
 232  233  /*
 233  234   * Advance a prpageheader_cur_t to the address space's next mapping, returning
 234  235   * its address, or NULL if there is none.  Any known nonpageable or nonresident
 235  236   * mappings will be skipped over.
 236  237   */
 237  238  static uintptr_t
 238  239  advance_prpageheader_cur_nextmapping(prpageheader_cur_t *pcp)
 239  240  {
 240  241          prasmap_t *pap;
 241  242          int i;
 242  243  
 243  244  next:
 244  245          ASSERT(pcp->pr_map < pcp->pr_nmap);
 245  246          if ((pcp->pr_map + 1) == pcp->pr_nmap)
 246  247                  return (NULL);
 247  248          pcp->pr_map++;
 248  249          if (pcp->pr_pgoff < pcp->pr_npage) {
 249  250                  pcp->pr_pdaddr = (caddr_t)(uintptr_t)
 250  251                      ((uintptr_t)pcp->pr_pdaddr +
 251  252                      (pcp->pr_npage - pcp->pr_pgoff));
 252  253                  pcp->pr_pgoff = pcp->pr_npage;
 253  254          }
 254  255          /*
 255  256           * Skip to next 64-bit-aligned address to get the next prasmap_t.
 256  257           */
 257  258          pcp->pr_pdaddr = (caddr_t)(((uintptr_t)pcp->pr_pdaddr + 7) & ~7);
 258  259          pap = (prasmap_t *)pcp->pr_pdaddr;
 259  260          pcp->pr_pgoff = 0;
 260  261          pcp->pr_npage = pap->pr_npage;
 261  262          pcp->pr_pagesize = pap->pr_pagesize;
 262  263          pcp->pr_addr = pap->pr_vaddr;
 263  264          pcp->pr_pdaddr = pap + 1;
 264  265  
 265  266          /*
 266  267           * Skip any known nonpageable mappings.  Currently, the only one
 267  268           * detected is the schedctl page.
 268  269           */
 269  270          if ((pap->pr_mflags ^ (MA_SHARED | MA_READ | MA_WRITE | MA_EXEC |
 270  271              MA_ANON)) == 0 && pap->pr_npage == 1) {
 271  272                  debug("identified nonpageable schedctl mapping at %p\n",
 272  273                      (void *)pcp->pr_addr);
 273  274                  goto next;
 274  275          }
 275  276  
 276  277          /*
 277  278           * Skip mappings with no resident pages.  If the xmap does not
 278  279           * correspond to the pagedata for any reason, it will be ignored.
 279  280           */
 280  281          pcp->pr_rss = -1;
 281  282          pcp->pr_pg_rss = -1;
 282  283          for (i = 0; i < pcp->pr_nxmap; i++) {
 283  284                  prxmap_t *xmap = &pcp->pr_xmap[i];
 284  285  
 285  286                  if (pcp->pr_addr == xmap->pr_vaddr && xmap->pr_size ==
 286  287                      (pcp->pr_npage * pcp->pr_pagesize)) {
 287  288                          pcp->pr_rss = xmap->pr_rss;
 288  289                          /*
 289  290                           * Remove COW pages from the pageable RSS count.
 290  291                           */
 291  292                          if ((xmap->pr_mflags & MA_SHARED) == 0)
 292  293                                  pcp->pr_pg_rss = xmap->pr_anon;
 293  294                          break;
 294  295                  }
 295  296          }
 296  297          if (pcp->pr_rss == 0) {
 297  298                  debug("identified nonresident mapping at 0x%p\n",
 298  299                      (void *)pcp->pr_addr);
 299  300                  goto next;
 300  301          } else if (pcp->pr_pg_rss == 0) {
 301  302                  debug("identified unpageable mapping at 0x%p\n",
 302  303                      (void *)pcp->pr_addr);
 303  304                  goto next;
 304  305          }
 305  306  
 306  307          return (pcp->pr_addr);
 307  308  }
 308  309  
 309  310  /*
 310  311   * Advance a prpageheader_cur_t to the mapping's next page, returning its
 311  312   * address, or NULL if there is none.
 312  313   */
 313  314  static void *
 314  315  advance_prpageheader_cur(prpageheader_cur_t *pcp)
 315  316  {
 316  317          ASSERT(pcp->pr_pgoff < pcp->pr_npage);
 317  318          if ((pcp->pr_pgoff + 1) == pcp->pr_npage)
 318  319                  return (NULL);
 319  320          pcp->pr_pdaddr = (caddr_t)pcp->pr_pdaddr + 1;
 320  321          pcp->pr_pgoff++;
 321  322  
 322  323          ASSERT((*(char *)pcp->pr_pdaddr & ~(PG_MODIFIED | PG_REFERENCED)) == 0);
 323  324          return ((caddr_t)pcp->pr_addr + pcp->pr_pgoff * pcp->pr_pagesize);
 324  325  }
 325  326  
 326  327  /*
 327  328   * Initialize a prpageheader_cur_t, positioned at the first page of the mapping
 328  329   * of an address space.
 329  330   */
 330  331  static void *
 331  332  set_prpageheader_cur(prpageheader_cur_t *pcp, prpageheader_t *php,
 332  333      prxmap_t *xmap, int nxmap)
 333  334  {
 334  335          bzero(pcp, sizeof (*pcp));
 335  336          pcp->pr_nmap = php->pr_nmap;
 336  337          pcp->pr_map = -1;
 337  338          pcp->pr_prpageheader = php;
 338  339          pcp->pr_xmap = xmap;
 339  340          pcp->pr_nxmap = nxmap;
 340  341          pcp->pr_pdaddr = (prpageheader_t *)php + 1;
 341  342  
 342  343          return ((void *)advance_prpageheader_cur_nextmapping(pcp));
 343  344  }
 344  345  
 345  346  /*
 346  347   * Position a prpageheader_cur_t to the mapped address greater or equal to the
 347  348   * given value.
 348  349   */
 349  350  static void *
 350  351  set_prpageheader_cur_addr(prpageheader_cur_t *pcp, prpageheader_t *php,
 351  352      prxmap_t *xmap, int nxmap, void *naddr)
 352  353  {
 353  354          void *addr = set_prpageheader_cur(pcp, php, xmap, nxmap);
 354  355  
 355  356          while (addr != NULL && addr <= naddr)
 356  357                  if (naddr < (void *)((caddr_t)pcp->pr_addr +
 357  358                      pcp->pr_pagesize * pcp->pr_npage)) {
 358  359                          uint64_t pgdiff = ((uintptr_t)naddr -
 359  360                              (uintptr_t)pcp->pr_addr) / pcp->pr_pagesize;
 360  361                          pcp->pr_pgoff += pgdiff;
 361  362                          pcp->pr_pdaddr = (caddr_t)pcp->pr_pdaddr + pgdiff;
 362  363                          addr = (caddr_t)pcp->pr_addr + pcp->pr_pagesize *
 363  364                              pcp->pr_pgoff;
 364  365                          break;
 365  366                  } else
 366  367                          addr =
 367  368                              (void *)advance_prpageheader_cur_nextmapping(pcp);
 368  369  
 369  370          return (addr);
 370  371  }
 371  372  
 372  373  static void
 373  374  revoke_pagedata(rfd_t *rfd)
 374  375  {
 375  376          lprocess_t *lpc = rfd->rfd_data;
 376  377  
 377  378          st_debug(STDL_NORMAL, lpc->lpc_collection, "revoking pagedata for"
 378  379              " process %d\n", (int)lpc->lpc_pid);
 379  380          ASSERT(lpc->lpc_pgdata_fd != -1);
 380  381          lpc->lpc_pgdata_fd = -1;
 381  382  }
 382  383  
 383  384  #ifdef DEBUG
 384  385  static void
 385  386  mklmapping(lmapping_t **lm, prpageheader_t *pgh)
 386  387  {
 387  388          prpageheader_cur_t cur;
 388  389          void *addr;
 389  390  
 390  391          addr = set_prpageheader_cur(&cur, pgh, NULL, -1);
 391  392          ASSERT(*lm == NULL);
 392  393          while (addr != NULL) {
 393  394                  (void) lmapping_insert(lm, cur.pr_addr, cur.pr_npage *
 394  395                      cur.pr_pagesize);
 395  396                  addr = (void *)advance_prpageheader_cur_nextmapping(&cur);
 396  397          }
 397  398  }
 398  399  
 399  400  static void
 400  401  lmapping_dump(lmapping_t *lm)
 401  402  {
 402  403          debug("lm: %p\n", (void *)lm);
 403  404          while (lm != NULL) {
 404  405                  debug("\t(%p, %llx\n", (void *)lm->lm_addr,
 405  406                      (unsigned long long)lm->lm_size);
 406  407                  lm = lm->lm_next;
 407  408          }
 408  409  }
 409  410  #endif /* DEBUG */
 410  411  
 411  412  /*
 412  413   * OR two prpagedata_t which are supposedly snapshots of the same address
 413  414   * space.  Intersecting mappings with different page sizes are tolerated but
 414  415   * not normalized (not accurate).  If the mappings of the two snapshots differ
 415  416   * in any regard, the supplied mappings_changed flag will be set.
 416  417   */
 417  418  static void
 418  419  OR_pagedata(prpageheader_t *src, prpageheader_t *dst, int *mappings_changedp)
 419  420  {
 420  421          prpageheader_cur_t src_cur;
 421  422          prpageheader_cur_t dst_cur;
 422  423          uintptr_t src_addr;
 423  424          uintptr_t dst_addr;
 424  425          int mappings_changed = 0;
 425  426  
 426  427          /*
 427  428           * OR source pagedata with the destination, for pages of intersecting
 428  429           * mappings.
 429  430           */
 430  431          src_addr = (uintptr_t)set_prpageheader_cur(&src_cur, src, NULL, -1);
 431  432          dst_addr = (uintptr_t)set_prpageheader_cur(&dst_cur, dst, NULL, -1);
 432  433          while (src_addr != NULL && dst_addr != NULL) {
 433  434                  while (src_addr == dst_addr && src_addr != NULL) {
 434  435                          *(char *)dst_cur.pr_pdaddr |=
 435  436                              *(char *)src_cur.pr_pdaddr;
 436  437                          src_addr = (uintptr_t)advance_prpageheader_cur(
 437  438                              &src_cur);
 438  439                          dst_addr = (uintptr_t)advance_prpageheader_cur(
 439  440                              &dst_cur);
 440  441                  }
 441  442                  if (src_addr != dst_addr)
 442  443                          mappings_changed = 1;
 443  444                  src_addr = advance_prpageheader_cur_nextmapping(&src_cur);
 444  445                  dst_addr = advance_prpageheader_cur_nextmapping(&dst_cur);
 445  446                  while (src_addr != dst_addr && src_addr != NULL && dst_addr !=
 446  447                      NULL) {
 447  448                          mappings_changed = 1;
 448  449                          if (src_addr < dst_addr)
 449  450                                  src_addr = advance_prpageheader_cur_nextmapping(
 450  451                                      &src_cur);
 451  452                          else
 452  453                                  dst_addr = advance_prpageheader_cur_nextmapping(
 453  454                                      &dst_cur);
 454  455                  }
 455  456          }
 456  457  
 457  458          *mappings_changedp = mappings_changed;
 458  459  }
 459  460  
 460  461  /*
 461  462   * Merge the current pagedata with that on hand.  If the pagedata is
 462  463   * unretrievable for any reason, such as the process having exited or being a
 463  464   * zombie, a nonzero value is returned, the process should be marked
 464  465   * unscannable, and future attempts to scan it should be avoided, since the
 465  466   * symptom is probably permament.  If the mappings of either pagedata
 466  467   * differ in any respect, the supplied callback will be invoked once.
 467  468   */
 468  469  static int
 469  470  merge_current_pagedata(lprocess_t *lpc,
 470  471      void(*mappings_changed_cb) (lprocess_t *))
 471  472  {
 472  473          prpageheader_t *pghp;
 473  474          int mappings_changed = 0;
 474  475          uint64_t cnt;
 475  476  
 476  477          if (lpc->lpc_pgdata_fd < 0 || get_pagedata(&pghp, lpc->lpc_pgdata_fd) !=
 477  478              0) {
 478  479                  char pathbuf[PROC_PATH_MAX];
 479  480  
 480  481                  (void) snprintf(pathbuf, sizeof (pathbuf), "/proc/%d/pagedata",
 481  482                      (int)lpc->lpc_pid);
 482  483                  if ((lpc->lpc_pgdata_fd = rfd_open(pathbuf, 1, RFD_PAGEDATA,
 483  484                      revoke_pagedata, lpc, O_RDONLY, 0)) < 0 ||
 484  485                      get_pagedata(&pghp, lpc->lpc_pgdata_fd) != 0)
 485  486                          return (-1);
 486  487                  debug("starting/resuming pagedata collection for %d\n",
 487  488                      (int)lpc->lpc_pid);
 488  489          }
 489  490  
 490  491          cnt = count_pages(pghp, 0, PG_MODIFIED | PG_REFERENCED, 0);
 491  492          if (cnt != 0 || lpc->lpc_rss != 0)
 492  493                  debug("process %d: %llu/%llukB rfd/mdfd since last read\n",
 493  494                      (int)lpc->lpc_pid, (unsigned long long)cnt,
 494  495                      (unsigned long long)lpc->lpc_rss);
 495  496          if (lpc->lpc_prpageheader != NULL) {
 496  497                  /*
 497  498                   * OR the two snapshots.
 498  499                   */
 499  500  #ifdef DEBUG
 500  501                  lmapping_t *old = NULL;
 501  502                  lmapping_t *new = NULL;
 502  503  
 503  504                  mklmapping(&new, pghp);
 504  505                  mklmapping(&old, lpc->lpc_prpageheader);
 505  506  #endif /* DEBUG */
 506  507                  OR_pagedata(lpc->lpc_prpageheader, pghp, &mappings_changed);
 507  508  #ifdef DEBUG
 508  509                  if (((mappings_changed != 0) ^
 509  510                      (lmapping_dump_diff(old, new) != 0))) {
 510  511                          debug("lmapping_changed inconsistent with lmapping\n");
 511  512                          debug("old\n");
 512  513                          lmapping_dump(old);
 513  514                          debug("new\n");
 514  515                          lmapping_dump(new);
 515  516                          debug("ignored\n");
 516  517                          lmapping_dump(lpc->lpc_ignore);
 517  518                          ASSERT(0);
 518  519                  }
 519  520                  lmapping_free(&new);
 520  521                  lmapping_free(&old);
 521  522  #endif /* DEBUG */
 522  523                  free(lpc->lpc_prpageheader);
 523  524          } else
 524  525                  mappings_changed = 1;
 525  526          lpc->lpc_prpageheader = pghp;
 526  527  
 527  528          cnt = count_pages(pghp, 0, PG_MODIFIED | PG_REFERENCED, 0);
 528  529          if (cnt != 0 || lpc->lpc_rss != 0)
 529  530                  debug("process %d: %llu/%llukB rfd/mdfd since hand swept\n",
 530  531                      (int)lpc->lpc_pid, (unsigned long long)cnt,
 531  532                      (unsigned long long)lpc->lpc_rss);
 532  533          if (mappings_changed != 0) {
 533  534                  debug("process %d: mappings changed\n", (int)lpc->lpc_pid);
 534  535                  if (mappings_changed_cb != NULL)
 535  536                          mappings_changed_cb(lpc);
 536  537          }
 537  538          return (0);
 538  539  }
 539  540  
 540  541  /*
 541  542   * Attempt to page out a region of the given process's address space.  May
 542  543   * return nonzero if not all of the pages may are pageable, for any reason.
 543  544   */
  
    | 
      ↓ open down ↓ | 
    510 lines elided | 
    
      ↑ open up ↑ | 
  
 544  545  static int
 545  546  pageout(pid_t pid, struct ps_prochandle *Pr, caddr_t start, caddr_t end)
 546  547  {
 547  548          int res;
 548  549  
 549  550          if (end <= start)
 550  551                  return (0);
 551  552  
 552  553          errno = 0;
 553  554          res = pr_memcntl(Pr, start, (end - start), MC_SYNC,
 554      -            (caddr_t)(MS_ASYNC | MS_INVALIDATE), 0, 0);
      555 +            (caddr_t)(MS_ASYNC | MS_INVALCURPROC), 0, 0);
 555  556          debug_high("pr_memcntl [%p-%p): %d", (void *)start, (void *)end, res);
 556  557  
 557  558          /*
 558  559           * EBUSY indicates none of the pages have backing store allocated, or
 559  560           * some pages were locked, which are less interesting than other
 560  561           * conditions, which are noted.
 561  562           */
 562  563          if (res != 0)
 563  564                  if (errno == EBUSY)
 564  565                          res = 0;
 565  566                  else
 566  567                          debug("%d: can't pageout %p+%llx (errno %d)", (int)pid,
 567  568                              (void *)start, (long long)(end - start), errno);
 568  569  
 569  570          return (res);
 570  571  }
 571  572  
 572  573  /*
 573  574   * Compute the delta of the victim process's RSS since the last call.  If the
 574  575   * psinfo cannot be obtained, no work is done, and no error is returned; it is
 575  576   * up to the caller to detect the process' termination via other means.
 576  577   */
 577  578  static int64_t
 578  579  rss_delta(psinfo_t *new_psinfo, psinfo_t *old_psinfo, lprocess_t *vic)
 579  580  {
 580  581          int64_t d_rss = 0;
 581  582  
 582  583          if (get_psinfo(vic->lpc_pid, new_psinfo, vic->lpc_psinfo_fd,
 583  584              lprocess_update_psinfo_fd_cb, vic, vic) == 0) {
 584  585                  d_rss = (int64_t)new_psinfo->pr_rssize -
 585  586                      (int64_t)old_psinfo->pr_rssize;
 586  587                  if (d_rss < 0)
 587  588                          vic->lpc_collection->lcol_stat.lcols_pg_eff +=
 588  589                              (- d_rss);
 589  590                  *old_psinfo = *new_psinfo;
 590  591          }
 591  592  
 592  593          return (d_rss);
 593  594  }
 594  595  
 595  596  static void
 596  597  unignore_mappings(lprocess_t *lpc)
 597  598  {
 598  599          lmapping_free(&lpc->lpc_ignore);
 599  600  }
 600  601  
 601  602  static void
 602  603  unignore_referenced_mappings(lprocess_t *lpc)
 603  604  {
 604  605          prpageheader_cur_t cur;
 605  606          void *vicaddr;
 606  607  
 607  608          vicaddr = set_prpageheader_cur(&cur, lpc->lpc_prpageheader, NULL, -1);
 608  609          while (vicaddr != NULL) {
 609  610                  if (((*(char *)cur.pr_pdaddr) & (PG_REFERENCED | PG_MODIFIED))
 610  611                      != 0) {
 611  612                          if (lmapping_remove(&lpc->lpc_ignore, cur.pr_addr,
 612  613                              cur.pr_npage * cur.pr_pagesize) == 0)
 613  614                                  debug("removed mapping 0x%p+0t%llukB from"
 614  615                                      " ignored set\n", (void *)cur.pr_addr,
 615  616                                      (unsigned long long)(cur.pr_npage *
 616  617                                      cur.pr_pagesize / 1024));
 617  618                          vicaddr = (void *)advance_prpageheader_cur_nextmapping(
 618  619                              &cur);
 619  620                  } else if ((vicaddr = advance_prpageheader_cur(&cur)) == NULL)
 620  621                          vicaddr = (void *)advance_prpageheader_cur_nextmapping(
 621  622                              &cur);
 622  623          }
 623  624  }
 624  625  
 625  626  /*
 626  627   * Resume scanning, starting with the last victim, if it is still valid, or any
 627  628   * other one, otherwise.
 628  629   */
 629  630  void
 630  631  scan(lcollection_t *lcol, int64_t excess)
 631  632  {
 632  633          lprocess_t *vic, *lpc;
 633  634          void *vicaddr, *endaddr, *nvicaddr;
 634  635          prpageheader_cur_t cur;
 635  636          psinfo_t old_psinfo, new_psinfo;
 636  637          hrtime_t scan_start;
 637  638          int res, resumed;
 638  639          uint64_t col_unrm_size;
 639  640  
 640  641          st_debug(STDL_NORMAL, lcol, "starting to scan, excess %lldk\n",
 641  642              (long long)excess);
 642  643  
 643  644          /*
 644  645           * Determine the address to start scanning at, depending on whether
 645  646           * scanning can be resumed.
 646  647           */
 647  648          endaddr = NULL;
 648  649          if ((vic = get_valid_victim(lcol, lcol->lcol_victim)) ==
 649  650              lcol->lcol_victim && lcol->lcol_resaddr != NULL) {
 650  651                  vicaddr = lcol->lcol_resaddr;
 651  652                  st_debug(STDL_NORMAL, lcol, "resuming process %d\n",
 652  653                      (int)vic->lpc_pid);
 653  654                  resumed = 1;
 654  655          } else {
 655  656                  vicaddr = NULL;
 656  657                  resumed = 0;
 657  658          }
 658  659  
 659  660          scan_start = gethrtime();
 660  661          /*
 661  662           * Obtain the most current pagedata for the processes that might be
 662  663           * scanned, and remove from the ignored set any mappings which have
 663  664           * referenced or modified pages (in the hopes that the pageability of
 664  665           * the mapping's pages may have changed).  Determine if the
 665  666           * unreferenced and unmodified portion is impossibly small to suffice
 666  667           * to reduce the excess completely.  If so, ignore these bits so that
 667  668           * even working set will be paged out.
 668  669           */
 669  670          col_unrm_size = 0;
 670  671          lpc = vic;
 671  672          while (lpc != NULL && should_run) {
 672  673                  if (merge_current_pagedata(lpc, unignore_mappings) != 0) {
 673  674                          st_debug(STDL_NORMAL, lcol, "process %d:"
 674  675                              " exited/temporarily unscannable",
 675  676                              (int)lpc->lpc_pid);
 676  677                          goto next;
 677  678                  }
 678  679                  debug("process %d: %llu/%llukB scannable\n", (int)lpc->lpc_pid,
 679  680                      (unsigned long long)(lpc->lpc_unrm = unrm_size(lpc)),
 680  681                      (unsigned long long)lpc->lpc_size);
 681  682                  col_unrm_size += lpc->lpc_unrm = unrm_size(lpc);
 682  683  
 683  684                  if ((lcol->lcol_stat.lcols_scan_count %
 684  685                      RCAPD_IGNORED_SET_FLUSH_IVAL) == 0) {
 685  686                          /*
 686  687                           * Periodically clear the set of ignored mappings.
 687  688                           * This will allow processes whose ignored segments'
 688  689                           * pageability have changed (without a corresponding
 689  690                           * reference or modification to a page) to be
 690  691                           * recognized.
 691  692                           */
 692  693                          if (lcol->lcol_stat.lcols_scan_count > 0)
 693  694                                  unignore_mappings(lpc);
 694  695                  } else {
 695  696                          /*
 696  697                           * Ensure mappings with referenced or modified pages
 697  698                           * are not in the ignored set.  Their usage might mean
 698  699                           * the condition which made them unpageable is gone.
 699  700                           */
 700  701                          unignore_referenced_mappings(lpc);
 701  702                  }
 702  703  next:
 703  704                  lpc = lpc->lpc_next != NULL ? get_valid_victim(lcol,
 704  705                      lpc->lpc_next) : NULL;
 705  706          }
 706  707          if (col_unrm_size < excess) {
 707  708                  lpc = vic;
 708  709                  debug("will not reduce excess with only unreferenced pages\n");
 709  710                  while (lpc != NULL && should_run) {
 710  711                          if (lpc->lpc_prpageheader != NULL) {
 711  712                                  (void) count_pages(lpc->lpc_prpageheader,
 712  713                                      CP_CLEAR, 0, 0);
 713  714                                  if (lpc->lpc_pgdata_fd >= 0) {
 714  715                                          if (rfd_close(lpc->lpc_pgdata_fd) != 0)
 715  716                                                  debug("coud not close %d"
 716  717                                                      " lpc_pgdata_fd %d",
 717  718                                                      (int)lpc->lpc_pid,
 718  719                                                      lpc->lpc_pgdata_fd);
 719  720                                          lpc->lpc_pgdata_fd = -1;
 720  721                                  }
 721  722                          }
 722  723                          lpc = lpc->lpc_next != NULL ? get_valid_victim(lcol,
 723  724                              lpc->lpc_next) : NULL;
 724  725                  }
 725  726          }
 726  727  
 727  728          /*
 728  729           * Examine each process for pages to remove until the excess is
 729  730           * reduced.
 730  731           */
 731  732          while (vic != NULL && excess > 0 && should_run) {
 732  733                  /*
 733  734                   * Skip processes whose death was reported when the merging of
 734  735                   * pagedata was attempted.
 735  736                   */
 736  737                  if (vic->lpc_prpageheader == NULL)
 737  738                          goto nextproc;
 738  739  
 739  740                  /*
 740  741                   * Obtain optional segment residency information.
 741  742                   */
 742  743                  if (lpc_xmap_update(vic) != 0)
 743  744                          st_debug(STDL_NORMAL, lcol, "process %d: xmap"
 744  745                              " unreadable; ignoring", (int)vic->lpc_pid);
 745  746  
 746  747  #ifdef DEBUG_MSG
 747  748                  {
 748  749                          void *ovicaddr = vicaddr;
 749  750  #endif /* DEBUG_MSG */
 750  751                  vicaddr = set_prpageheader_cur_addr(&cur, vic->lpc_prpageheader,
 751  752                      vic->lpc_xmap, vic->lpc_nxmap, vicaddr);
 752  753  #ifdef DEBUG_MSG
 753  754                          st_debug(STDL_NORMAL, lcol, "trying to resume from"
 754  755                              " 0x%p, next 0x%p\n", ovicaddr, vicaddr);
 755  756                  }
 756  757  #endif /* DEBUG_MSG */
 757  758  
 758  759                  /*
 759  760                   * Take control of the victim.
 760  761                   */
 761  762                  if (get_psinfo(vic->lpc_pid, &old_psinfo,
 762  763                      vic->lpc_psinfo_fd, lprocess_update_psinfo_fd_cb,
 763  764                      vic, vic) != 0) {
 764  765                          st_debug(STDL_NORMAL, lcol, "cannot get %d psinfo",
 765  766                              (int)vic->lpc_pid);
 766  767                          goto nextproc;
 767  768                  }
 768  769                  (void) rfd_reserve(PGRAB_FD_COUNT);
 769  770                  if ((scan_pr = Pgrab(vic->lpc_pid, 0, &res)) == NULL) {
 770  771                          st_debug(STDL_NORMAL, lcol, "cannot grab %d (%d)",
 771  772                              (int)vic->lpc_pid, res);
 772  773                          goto nextproc;
 773  774                  }
 774  775                  if (Pcreate_agent(scan_pr) != 0) {
 775  776                          st_debug(STDL_NORMAL, lcol, "cannot control %d",
 776  777                              (int)vic->lpc_pid);
 777  778                          goto nextproc;
 778  779                  }
 779  780                  /*
 780  781                   * Be very pessimistic about the state of the agent LWP --
 781  782                   * verify it's actually stopped.
 782  783                   */
 783  784                  errno = 0;
 784  785                  while (Pstate(scan_pr) == PS_RUN)
 785  786                          (void) Pwait(scan_pr, 0);
 786  787                  if (Pstate(scan_pr) != PS_STOP) {
 787  788                          st_debug(STDL_NORMAL, lcol, "agent not in expected"
 788  789                              " state (%d)", Pstate(scan_pr));
 789  790                          goto nextproc;
 790  791                  }
 791  792  
 792  793                  /*
 793  794                   * Within the victim's address space, find contiguous ranges of
 794  795                   * unreferenced pages to page out.
 795  796                   */
 796  797                  st_debug(STDL_NORMAL, lcol, "paging out process %d\n",
 797  798                      (int)vic->lpc_pid);
 798  799                  while (excess > 0 && vicaddr != NULL && should_run) {
 799  800                          /*
 800  801                           * Skip mappings in the ignored set.  Mappings get
 801  802                           * placed in the ignored set when all their resident
 802  803                           * pages are unreference and unmodified, yet unpageable
 803  804                           * -- such as when they are locked, or involved in
 804  805                           * asynchronous I/O.  They will be scanned again when
 805  806                           * some page is referenced or modified.
 806  807                           */
 807  808                          if (lmapping_contains(vic->lpc_ignore, cur.pr_addr,
 808  809                              cur.pr_npage * cur.pr_pagesize)) {
 809  810                                  debug("ignored mapping at 0x%p\n",
 810  811                                      (void *)cur.pr_addr);
 811  812                                  /*
 812  813                                   * Update statistics.
 813  814                                   */
 814  815                                  lcol->lcol_stat.lcols_pg_att +=
 815  816                                      cur.pr_npage * cur.pr_pagesize / 1024;
 816  817  
 817  818                                  vicaddr = (void *)
 818  819                                      advance_prpageheader_cur_nextmapping(&cur);
 819  820                                  continue;
 820  821                          }
 821  822  
 822  823                          /*
 823  824                           * Determine a range of unreferenced pages to page out,
 824  825                           * and clear the R/M bits in the preceding referenced
 825  826                           * range.
 826  827                           */
 827  828                          st_debug(STDL_HIGH, lcol, "start from mapping at 0x%p,"
 828  829                              " npage %llu\n", vicaddr,
 829  830                              (unsigned long long)cur.pr_npage);
 830  831                          while (vicaddr != NULL &&
 831  832                              *(caddr_t)cur.pr_pdaddr != 0) {
 832  833                                  *(caddr_t)cur.pr_pdaddr = 0;
 833  834                                  vicaddr = advance_prpageheader_cur(&cur);
 834  835                          }
 835  836                          st_debug(STDL_HIGH, lcol, "advance, vicaddr %p, pdaddr"
 836  837                              " %p\n", vicaddr, cur.pr_pdaddr);
 837  838                          if (vicaddr == NULL) {
 838  839                                  /*
 839  840                                   * The end of mapping was reached before any
 840  841                                   * unreferenced pages were seen.
 841  842                                   */
 842  843                                  vicaddr = (void *)
 843  844                                      advance_prpageheader_cur_nextmapping(&cur);
 844  845                                  continue;
 845  846                          }
 846  847                          do
 847  848                                  endaddr = advance_prpageheader_cur(&cur);
 848  849                          while (endaddr != NULL &&
 849  850                              *(caddr_t)cur.pr_pdaddr == 0 &&
 850  851                              (((intptr_t)endaddr - (intptr_t)vicaddr) /
 851  852                                  1024) < excess);
 852  853                          st_debug(STDL_HIGH, lcol, "endaddr %p, *cur %d\n",
 853  854                              endaddr, *(caddr_t)cur.pr_pdaddr);
 854  855  
 855  856                          /*
 856  857                           * Page out from vicaddr to the end of the mapping, or
 857  858                           * endaddr if set, then continue scanning after
 858  859                           * endaddr, or the next mapping, if not set.
 859  860                           */
 860  861                          nvicaddr = endaddr;
 861  862                          if (endaddr == NULL)
 862  863                                  endaddr = (caddr_t)cur.pr_addr +
 863  864                                      cur.pr_pagesize * cur.pr_npage;
 864  865                          if (pageout(vic->lpc_pid, scan_pr, vicaddr, endaddr) ==
 865  866                              0) {
 866  867                                  int64_t d_rss, att;
 867  868                                  int willignore = 0;
 868  869  
 869  870                                  excess += (d_rss = rss_delta(
 870  871                                      &new_psinfo, &old_psinfo, vic));
 871  872  
 872  873                                  /*
 873  874                                   * If this pageout attempt was unsuccessful
 874  875                                   * (the resident portion was not affected), and
 875  876                                   * was for the whole mapping, put it in the
 876  877                                   * ignored set, so it will not be scanned again
 877  878                                   * until some page is referenced or modified.
 878  879                                   */
 879  880                                  if (d_rss >= 0 && (void *)cur.pr_addr ==
 880  881                                      vicaddr && (cur.pr_pagesize * cur.pr_npage)
 881  882                                      == ((uintptr_t)endaddr -
 882  883                                      (uintptr_t)vicaddr)) {
 883  884                                          if (lmapping_insert(
 884  885                                              &vic->lpc_ignore,
 885  886                                              cur.pr_addr,
 886  887                                              cur.pr_pagesize *
 887  888                                              cur.pr_npage) != 0)
 888  889                                                  debug("not enough memory to add"
 889  890                                                      " mapping at %p to ignored"
 890  891                                                      " set\n",
 891  892                                                      (void *)cur.pr_addr);
 892  893                                          willignore = 1;
 893  894                                  }
 894  895  
 895  896                                  /*
 896  897                                   * Update statistics.
 897  898                                   */
 898  899                                  lcol->lcol_stat.lcols_pg_att += (att =
 899  900                                      ((intptr_t)endaddr - (intptr_t)vicaddr) /
 900  901                                      1024);
 901  902                                  st_debug(STDL_NORMAL, lcol, "paged out 0x%p"
 902  903                                      "+0t(%llu/%llu)kB%s\n", vicaddr,
 903  904                                      (unsigned long long)((d_rss <
 904  905                                      0) ? - d_rss : 0), (unsigned long long)att,
 905  906                                      willignore ? " (will ignore)" : "");
 906  907                          } else {
 907  908                                  st_debug(STDL_NORMAL, lcol,
 908  909                                      "process %d: exited/unscannable\n",
 909  910                                      (int)vic->lpc_pid);
 910  911                                  vic->lpc_unscannable = 1;
 911  912                                  goto nextproc;
 912  913                          }
 913  914  
 914  915                          /*
 915  916                           * Update the statistics file, if it's time.
 916  917                           */
 917  918                          check_update_statistics();
 918  919  
 919  920                          vicaddr = (nvicaddr != NULL) ? nvicaddr : (void
 920  921                              *)advance_prpageheader_cur_nextmapping(&cur);
 921  922                  }
 922  923                  excess += rss_delta(&new_psinfo, &old_psinfo, vic);
 923  924                  st_debug(STDL_NORMAL, lcol, "done, excess %lld\n",
 924  925                      (long long)excess);
 925  926  nextproc:
 926  927                  /*
 927  928                   * If a process was grabbed, release it, destroying its agent.
 928  929                   */
 929  930                  if (scan_pr != NULL) {
 930  931                          (void) Prelease(scan_pr, 0);
 931  932                          scan_pr = NULL;
 932  933                  }
 933  934                  lcol->lcol_victim = vic;
 934  935                  /*
 935  936                   * Scan the collection at most once.  Only if scanning was not
 936  937                   * aborted for any reason, and the end of lprocess has not been
 937  938                   * reached, determine the next victim and scan it.
 938  939                   */
 939  940                  if (vic != NULL) {
 940  941                          if (vic->lpc_next != NULL) {
 941  942                                  /*
 942  943                                   * Determine the next process to be scanned.
 943  944                                   */
 944  945                                  if (excess > 0) {
 945  946                                          vic = get_valid_victim(lcol,
 946  947                                              vic->lpc_next);
 947  948                                          vicaddr = 0;
 948  949                                  }
 949  950                          } else {
 950  951                                  /*
 951  952                                   * A complete scan of the collection was made,
 952  953                                   * so tick the scan counter and stop scanning
 953  954                                   * until the next request.
 954  955                                   */
 955  956                                  lcol->lcol_stat.lcols_scan_count++;
 956  957                                  lcol->lcol_stat.lcols_scan_time_complete
 957  958                                      = lcol->lcol_stat.lcols_scan_time;
 958  959                                  /*
 959  960                                   * If an excess still exists, tick the
 960  961                                   * "ineffective scan" counter, signalling that
 961  962                                   * the cap may be uneforceable.
 962  963                                   */
 963  964                                  if (resumed == 0 && excess > 0)
 964  965                                          lcol->lcol_stat
 965  966                                              .lcols_scan_ineffective++;
 966  967                                  /*
 967  968                                   * Scanning should start at the beginning of
 968  969                                   * the process list at the next request.
 969  970                                   */
 970  971                                  if (excess > 0)
 971  972                                          vic = NULL;
 972  973                          }
 973  974                  }
 974  975          }
 975  976          lcol->lcol_stat.lcols_scan_time += (gethrtime() - scan_start);
 976  977          st_debug(STDL_HIGH, lcol, "done scanning; excess %lld\n",
 977  978              (long long)excess);
 978  979  
 979  980          lcol->lcol_resaddr = vicaddr;
 980  981          if (lcol->lcol_resaddr == NULL && lcol->lcol_victim != NULL) {
 981  982                  lcol->lcol_victim = get_valid_victim(lcol,
 982  983                      lcol->lcol_victim->lpc_next);
 983  984          }
 984  985  }
 985  986  
 986  987  /*
 987  988   * Abort the scan in progress, and destroy the agent LWP of any grabbed
 988  989   * processes.
 989  990   */
 990  991  void
 991  992  scan_abort(void)
 992  993  {
 993  994          if (scan_pr != NULL)
 994  995                  (void) Prelease(scan_pr, NULL);
 995  996  }
 996  997  
 997  998  static void
 998  999  revoke_xmap(rfd_t *rfd)
 999 1000  {
1000 1001          lprocess_t *lpc = rfd->rfd_data;
1001 1002  
1002 1003          debug("revoking xmap for process %d\n", (int)lpc->lpc_pid);
1003 1004          ASSERT(lpc->lpc_xmap_fd != -1);
1004 1005          lpc->lpc_xmap_fd = -1;
1005 1006  }
1006 1007  
1007 1008  /*
1008 1009   * Retrieve the process's current xmap , which is used to determine the size of
1009 1010   * the resident portion of its segments.  Return zero if successful.
1010 1011   */
1011 1012  static int
1012 1013  lpc_xmap_update(lprocess_t *lpc)
1013 1014  {
1014 1015          int res;
1015 1016          struct stat st;
1016 1017  
1017 1018          free(lpc->lpc_xmap);
1018 1019          lpc->lpc_xmap = NULL;
1019 1020          lpc->lpc_nxmap = -1;
1020 1021  
1021 1022          if (lpc->lpc_xmap_fd == -1) {
1022 1023                  char pathbuf[PROC_PATH_MAX];
1023 1024  
1024 1025                  (void) snprintf(pathbuf, sizeof (pathbuf), "/proc/%d/xmap",
1025 1026                      (int)lpc->lpc_pid);
1026 1027                  if ((lpc->lpc_xmap_fd = rfd_open(pathbuf, 1, RFD_XMAP,
1027 1028                      revoke_xmap, lpc, O_RDONLY, 0)) < 0)
1028 1029                          return (-1);
1029 1030          }
1030 1031  
1031 1032  redo:
1032 1033          errno = 0;
1033 1034          if (fstat(lpc->lpc_xmap_fd, &st) != 0) {
1034 1035                  debug("cannot stat xmap\n");
1035 1036                  (void) rfd_close(lpc->lpc_xmap_fd);
1036 1037                  lpc->lpc_xmap_fd = -1;
1037 1038                  return (-1);
1038 1039          }
1039 1040  
1040 1041          if ((st.st_size % sizeof (*lpc->lpc_xmap)) != 0) {
1041 1042                  debug("xmap wrong size\n");
1042 1043                  (void) rfd_close(lpc->lpc_xmap_fd);
1043 1044                  lpc->lpc_xmap_fd = -1;
1044 1045                  return (-1);
1045 1046          }
1046 1047  
1047 1048          lpc->lpc_xmap = malloc(st.st_size);
1048 1049          if (lpc->lpc_xmap == NULL) {
1049 1050                  debug("cannot malloc() %ld bytes for xmap", st.st_size);
1050 1051                  (void) rfd_close(lpc->lpc_xmap_fd);
1051 1052                  lpc->lpc_xmap_fd = -1;
1052 1053                  return (-1);
1053 1054          }
1054 1055  
1055 1056          if ((res = pread(lpc->lpc_xmap_fd, lpc->lpc_xmap, st.st_size, 0)) !=
1056 1057              st.st_size) {
1057 1058                  free(lpc->lpc_xmap);
1058 1059                  lpc->lpc_xmap = NULL;
1059 1060                  if (res > 0) {
1060 1061                          debug("xmap changed size, retrying\n");
1061 1062                          goto redo;
1062 1063                  } else {
1063 1064                          debug("cannot read xmap");
1064 1065                          return (-1);
1065 1066                  }
1066 1067          }
1067 1068          lpc->lpc_nxmap = st.st_size / sizeof (*lpc->lpc_xmap);
1068 1069  
1069 1070          return (0);
1070 1071  }
  
    | 
      ↓ open down ↓ | 
    506 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX