Print this page
Revert "OS-8005 bhyve memory pressure needs to target ARC better (#354)"
This reverts commit a6033573eedd94118d2b9e65f45deca0bf4b42f7.


   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2020 Joyent, Inc.
  24  * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  25  * Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
  26  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
  27  * Copyright (c) 2011, 2019, Delphix. All rights reserved.
  28  * Copyright (c) 2020, George Amanakis. All rights reserved.
  29  * Copyright (c) 2020, The FreeBSD Foundation [1]
  30  *
  31  * [1] Portions of this software were developed by Allan Jude
  32  *     under sponsorship from the FreeBSD Foundation.
  33  */
  34 
  35 /*
  36  * DVA-based Adjustable Replacement Cache
  37  *
  38  * While much of the theory of operation used here is
  39  * based on the self-tuning, low overhead replacement cache
  40  * presented by Megiddo and Modha at FAST 2003, there are some
  41  * significant differences:
  42  *
  43  * 1. The Megiddo and Modha model assumes any page is evictable.


4527                 ASSERT((int64_t)arc_p >= 0);
4528         }
4529 
4530         if (asize > arc_c) {
4531                 /* See comment in arc_adjust_cb_check() on why lock+flag */
4532                 mutex_enter(&arc_adjust_lock);
4533                 arc_adjust_needed = B_TRUE;
4534                 mutex_exit(&arc_adjust_lock);
4535                 zthr_wakeup(arc_adjust_zthr);
4536         }
4537 }
4538 
4539 typedef enum free_memory_reason_t {
4540         FMR_UNKNOWN,
4541         FMR_NEEDFREE,
4542         FMR_LOTSFREE,
4543         FMR_SWAPFS_MINFREE,
4544         FMR_PAGES_PP_MAXIMUM,
4545         FMR_HEAP_ARENA,
4546         FMR_ZIO_ARENA,
4547         FMR_VIRT_MACHINE,       /* 'VM' seems ambiguous in this context */
4548 } free_memory_reason_t;
4549 
4550 int64_t last_free_memory;
4551 free_memory_reason_t last_free_reason;
4552 
4553 /*
4554  * Additional reserve of pages for pp_reserve.
4555  */
4556 int64_t arc_pages_pp_reserve = 64;
4557 
4558 /*
4559  * Additional reserve of pages for swapfs.
4560  */
4561 int64_t arc_swapfs_reserve = 64;
4562 
4563 static volatile uint64_t arc_virt_machine_reserved;
4564 
4565 /*
4566  * XXX: A possible concern is that we allow arc_virt_machine_reserved to
4567  * get so large that we cause the arc to perform a lot of additional
4568  * work to keep the arc extremely small. We may want to set limits to
4569  * the size of arc_virt_machine_reserved and disallow reservations
4570  * beyond that limit.
4571  */
4572 int
4573 arc_virt_machine_reserve(size_t pages)
4574 {
4575         uint64_t newv;
4576 
4577         newv = atomic_add_64_nv(&arc_virt_machine_reserved, pages);
4578 
4579         /*
4580          * Since arc_virt_machine_reserved effectively lowers arc_c_max
4581          * as needed for vmm memory, if this request would put the arc
4582          * under arc_c_min, we reject it.  arc_c_min should be a value that
4583          * ensures reasonable performance for non-VMM stuff, as well as keep
4584          * us from dipping below lotsfree, which could trigger the pager
4585          * (and send the system toa grinding halt while it pages).
4586          *
4587          * XXX: This is a bit hacky and might be better done w/ a mutex
4588          * instead of atomic ops.
4589          */
4590         if (newv + arc_c_min > arc_c_max) {
4591                 atomic_add_64(&arc_virt_machine_reserved, -(int64_t)pages);
4592                 return (ENOMEM);
4593         }
4594 
4595         zthr_wakeup(arc_reap_zthr);
4596         return (0);
4597 }
4598 
4599 void
4600 arc_virt_machine_release(size_t pages)
4601 {
4602         atomic_add_64(&arc_virt_machine_reserved, -(int64_t)pages);
4603 }
4604 
4605 /*
4606  * Return the amount of memory that can be consumed before reclaim will be
4607  * needed.  Positive if there is sufficient free memory, negative indicates
4608  * the amount of memory that needs to be freed up.
4609  */
4610 static int64_t
4611 arc_available_memory(void)
4612 {
4613         int64_t lowest = INT64_MAX;
4614         int64_t n;
4615         free_memory_reason_t r = FMR_UNKNOWN;
4616 
4617 #ifdef _KERNEL
4618         if (needfree > 0) {
4619                 n = PAGESIZE * (-needfree);
4620                 if (n < lowest) {
4621                         lowest = n;
4622                         r = FMR_NEEDFREE;
4623                 }
4624         }
4625 


4648         if (n < lowest) {
4649                 lowest = n;
4650                 r = FMR_SWAPFS_MINFREE;
4651         }
4652 
4653 
4654         /*
4655          * Check that we have enough availrmem that memory locking (e.g., via
4656          * mlock(3C) or memcntl(2)) can still succeed.  (pages_pp_maximum
4657          * stores the number of pages that cannot be locked; when availrmem
4658          * drops below pages_pp_maximum, page locking mechanisms such as
4659          * page_pp_lock() will fail.)
4660          */
4661         n = PAGESIZE * (availrmem - pages_pp_maximum -
4662             arc_pages_pp_reserve);
4663         if (n < lowest) {
4664                 lowest = n;
4665                 r = FMR_PAGES_PP_MAXIMUM;
4666         }
4667 
4668         /*
4669          * Check that we have enough memory for any virtual machines that
4670          * are running or starting. We add desfree to keep us out of
4671          * particularly dire circumstances.
4672          */
4673         n = PAGESIZE * (availrmem - arc_virt_machine_reserved - desfree);
4674         if (n < lowest) {
4675                 lowest = n;
4676                 r = FMR_VIRT_MACHINE;
4677         }
4678 
4679 #if defined(__i386)
4680         /*
4681          * If we're on an i386 platform, it's possible that we'll exhaust the
4682          * kernel heap space before we ever run out of available physical
4683          * memory.  Most checks of the size of the heap_area compare against
4684          * tune.t_minarmem, which is the minimum available real memory that we
4685          * can have in the system.  However, this is generally fixed at 25 pages
4686          * which is so low that it's useless.  In this comparison, we seek to
4687          * calculate the total heap-size, and reclaim if more than 3/4ths of the
4688          * heap is allocated.  (Or, in the calculation, if less than 1/4th is
4689          * free)
4690          */
4691         n = (int64_t)vmem_size(heap_arena, VMEM_FREE) -
4692             (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2);
4693         if (n < lowest) {
4694                 lowest = n;
4695                 r = FMR_HEAP_ARENA;
4696         }
4697 #endif
4698 




   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2019, Joyent, Inc.
  24  * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  25  * Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
  26  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
  27  * Copyright (c) 2011, 2019, Delphix. All rights reserved.
  28  * Copyright (c) 2020, George Amanakis. All rights reserved.
  29  * Copyright (c) 2020, The FreeBSD Foundation [1]
  30  *
  31  * [1] Portions of this software were developed by Allan Jude
  32  *     under sponsorship from the FreeBSD Foundation.
  33  */
  34 
  35 /*
  36  * DVA-based Adjustable Replacement Cache
  37  *
  38  * While much of the theory of operation used here is
  39  * based on the self-tuning, low overhead replacement cache
  40  * presented by Megiddo and Modha at FAST 2003, there are some
  41  * significant differences:
  42  *
  43  * 1. The Megiddo and Modha model assumes any page is evictable.


4527                 ASSERT((int64_t)arc_p >= 0);
4528         }
4529 
4530         if (asize > arc_c) {
4531                 /* See comment in arc_adjust_cb_check() on why lock+flag */
4532                 mutex_enter(&arc_adjust_lock);
4533                 arc_adjust_needed = B_TRUE;
4534                 mutex_exit(&arc_adjust_lock);
4535                 zthr_wakeup(arc_adjust_zthr);
4536         }
4537 }
4538 
4539 typedef enum free_memory_reason_t {
4540         FMR_UNKNOWN,
4541         FMR_NEEDFREE,
4542         FMR_LOTSFREE,
4543         FMR_SWAPFS_MINFREE,
4544         FMR_PAGES_PP_MAXIMUM,
4545         FMR_HEAP_ARENA,
4546         FMR_ZIO_ARENA,

4547 } free_memory_reason_t;
4548 
4549 int64_t last_free_memory;
4550 free_memory_reason_t last_free_reason;
4551 
4552 /*
4553  * Additional reserve of pages for pp_reserve.
4554  */
4555 int64_t arc_pages_pp_reserve = 64;
4556 
4557 /*
4558  * Additional reserve of pages for swapfs.
4559  */
4560 int64_t arc_swapfs_reserve = 64;
4561 


4562 /*








































4563  * Return the amount of memory that can be consumed before reclaim will be
4564  * needed.  Positive if there is sufficient free memory, negative indicates
4565  * the amount of memory that needs to be freed up.
4566  */
4567 static int64_t
4568 arc_available_memory(void)
4569 {
4570         int64_t lowest = INT64_MAX;
4571         int64_t n;
4572         free_memory_reason_t r = FMR_UNKNOWN;
4573 
4574 #ifdef _KERNEL
4575         if (needfree > 0) {
4576                 n = PAGESIZE * (-needfree);
4577                 if (n < lowest) {
4578                         lowest = n;
4579                         r = FMR_NEEDFREE;
4580                 }
4581         }
4582 


4605         if (n < lowest) {
4606                 lowest = n;
4607                 r = FMR_SWAPFS_MINFREE;
4608         }
4609 
4610 
4611         /*
4612          * Check that we have enough availrmem that memory locking (e.g., via
4613          * mlock(3C) or memcntl(2)) can still succeed.  (pages_pp_maximum
4614          * stores the number of pages that cannot be locked; when availrmem
4615          * drops below pages_pp_maximum, page locking mechanisms such as
4616          * page_pp_lock() will fail.)
4617          */
4618         n = PAGESIZE * (availrmem - pages_pp_maximum -
4619             arc_pages_pp_reserve);
4620         if (n < lowest) {
4621                 lowest = n;
4622                 r = FMR_PAGES_PP_MAXIMUM;
4623         }
4624 











4625 #if defined(__i386)
4626         /*
4627          * If we're on an i386 platform, it's possible that we'll exhaust the
4628          * kernel heap space before we ever run out of available physical
4629          * memory.  Most checks of the size of the heap_area compare against
4630          * tune.t_minarmem, which is the minimum available real memory that we
4631          * can have in the system.  However, this is generally fixed at 25 pages
4632          * which is so low that it's useless.  In this comparison, we seek to
4633          * calculate the total heap-size, and reclaim if more than 3/4ths of the
4634          * heap is allocated.  (Or, in the calculation, if less than 1/4th is
4635          * free)
4636          */
4637         n = (int64_t)vmem_size(heap_arena, VMEM_FREE) -
4638             (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2);
4639         if (n < lowest) {
4640                 lowest = n;
4641                 r = FMR_HEAP_ARENA;
4642         }
4643 #endif
4644