relay_server/utils/memory.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
use crate::statsd::RelayGauges;
use arc_swap::ArcSwap;
use relay_config::Config;
use relay_statsd::metric;
use std::fmt;
use std::fmt::Formatter;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex};
use std::time::Instant;
use sysinfo::{MemoryRefreshKind, System};
/// The representation of the current memory state.
#[derive(Clone, Copy, Debug)]
pub struct Memory {
/// Used memory.
///
/// This measure of used memory represents the Resident Set Size (RSS) which represents the
/// amount of physical memory that a process has in the main memory that does not correspond
/// to anything on disk.
pub used: u64,
/// Total memory.
pub total: u64,
}
impl Memory {
/// Returns the percentage amount of used memory in the interval [0.0, 1.0].
///
/// The percentage measurement will return 1.0 in the following edge cases:
/// - When total is 0
/// - When used / total produces a NaN
pub fn used_percent(&self) -> f32 {
let used_percent = self.used as f32 / self.total as f32;
if used_percent.is_nan() {
return 1.0;
};
used_percent.clamp(0.0, 1.0)
}
}
/// Inner struct that holds the latest [`Memory`] state which is polled at least every 100ms.
///
/// The goal of this implementation is to offer lock-free reading to any arbitrary number of threads
/// while at the same time, reducing to the minimum the need for locking when memory stats need to
/// be updated.
///
/// Because of how the implementation is designed, there is a very small chance that multiple
/// threads are waiting on the lock that guards [`System`]. The only case in which there might be
/// multiple threads waiting on the lock, is if a thread holds the lock for more than
/// `refresh_frequency_ms` and a new thread comes and updates the `last_update` and tries
/// to acquire the lock to perform another memory reading.
struct Inner {
memory: ArcSwap<Memory>,
last_update: AtomicU64,
reference_time: Instant,
system: Mutex<System>,
refresh_frequency_ms: u64,
}
/// Wrapper which hides the [`Arc`] and exposes utils method to make working with
/// [`MemoryStat`] as opaque as possible.
#[derive(Clone)]
pub struct MemoryStat(Arc<Inner>);
impl MemoryStat {
/// Creates an instance of [`MemoryStat`] and obtains the current memory readings from
/// [`System`].
pub fn new(refresh_frequency_ms: u64) -> Self {
// sysinfo docs suggest to use a single instance of `System` across the program.
let mut system = System::new();
Self(Arc::new(Inner {
memory: ArcSwap::from(Arc::new(Self::refresh_memory(&mut system))),
last_update: AtomicU64::new(0),
reference_time: Instant::now(),
system: Mutex::new(system),
refresh_frequency_ms,
}))
}
/// Returns a copy of the most up-to-date memory data.
pub fn memory(&self) -> Memory {
self.try_update();
**self.0.memory.load()
}
/// Refreshes the memory readings.
fn refresh_memory(system: &mut System) -> Memory {
system.refresh_memory_specifics(MemoryRefreshKind::new().with_ram());
let memory = match system.cgroup_limits() {
Some(cgroup) => Memory {
used: cgroup.rss,
total: cgroup.total_memory,
},
None => Memory {
used: system.used_memory(),
total: system.total_memory(),
},
};
metric!(gauge(RelayGauges::SystemMemoryUsed) = memory.used);
metric!(gauge(RelayGauges::SystemMemoryTotal) = memory.total);
memory
}
/// Updates the memory readings if at least `refresh_frequency_ms` has passed.
fn try_update(&self) {
let last_update = self.0.last_update.load(Ordering::Relaxed);
let elapsed_time = self.0.reference_time.elapsed().as_millis() as u64;
if elapsed_time - last_update < self.0.refresh_frequency_ms {
return;
}
if self
.0
.last_update
.compare_exchange_weak(
last_update,
elapsed_time,
Ordering::Relaxed,
Ordering::Relaxed,
)
.is_err()
{
return;
}
let mut system = self
.0
.system
.lock()
.unwrap_or_else(|system| system.into_inner());
let updated_memory = Self::refresh_memory(&mut system);
self.0.memory.store(Arc::new(updated_memory));
}
}
impl fmt::Debug for MemoryStat {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "MemoryStat")
}
}
impl Default for MemoryStat {
fn default() -> Self {
Self::new(100)
}
}
/// Enum representing the two different states of a memory check.
pub enum MemoryCheck {
/// The memory usage is below the specified thresholds.
Ok(Memory),
/// The memory usage exceeds the specified thresholds.
Exceeded(Memory),
}
impl MemoryCheck {
/// Returns `true` if [`MemoryCheck`] is of variant [`MemoryCheck::Ok`].
pub fn has_capacity(&self) -> bool {
matches!(self, Self::Ok(_))
}
/// Returns `true` if [`MemoryCheck`] is of variant [`MemoryCheck::Exceeded`].
pub fn is_exceeded(&self) -> bool {
!self.has_capacity()
}
}
/// Struct that composes a [`Config`] and [`MemoryStat`] and provides utility methods to validate
/// whether memory is within limits.
///
/// The rationale behind such struct, is to be able to share across Relay the same logic for dealing
/// with memory readings. It's decoupled from [`MemoryStat`] because it's just a layer on top that
/// decides how memory readings are interpreted.
#[derive(Clone, Debug)]
pub struct MemoryChecker {
memory_stat: MemoryStat,
config: Arc<Config>,
}
impl MemoryChecker {
/// Create an instance of [`MemoryChecker`].
pub fn new(memory_stat: MemoryStat, config: Arc<Config>) -> Self {
Self {
memory_stat,
config: config.clone(),
}
}
/// Checks if the used percentage of memory is below the specified threshold.
pub fn check_memory_percent(&self) -> MemoryCheck {
let memory = self.memory_stat.memory();
if memory.used_percent() < self.config.health_max_memory_watermark_percent() {
return MemoryCheck::Ok(memory);
}
MemoryCheck::Exceeded(memory)
}
/// Checks if the used memory (in bytes) is below the specified threshold.
pub fn check_memory_bytes(&self) -> MemoryCheck {
let memory = self.memory_stat.memory();
if memory.used < self.config.health_max_memory_watermark_bytes() {
return MemoryCheck::Ok(memory);
}
MemoryCheck::Exceeded(memory)
}
/// Checks if the used memory is below both percentage and bytes thresholds.
///
/// This is the function that should be mainly used for checking whether of not Relay has
/// enough memory.
pub fn check_memory(&self) -> MemoryCheck {
let memory = self.memory_stat.memory();
if memory.used_percent() < self.config.health_max_memory_watermark_percent()
&& memory.used < self.config.health_max_memory_watermark_bytes()
{
return MemoryCheck::Ok(memory);
}
MemoryCheck::Exceeded(memory)
}
}
#[cfg(test)]
mod tests {
use relay_config::Config;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::thread::sleep;
use std::time::Duration;
use crate::utils::{Memory, MemoryChecker, MemoryStat};
#[test]
fn test_memory_used_percent_both_0() {
let memory = Memory { used: 0, total: 0 };
assert_eq!(memory.used_percent(), 1.0);
}
#[test]
fn test_memory_used_percent_total_0() {
let memory = Memory {
used: 100,
total: 0,
};
assert_eq!(memory.used_percent(), 1.0);
}
#[test]
fn test_memory_used_percent_zero() {
let memory = Memory {
used: 0,
total: 100,
};
assert_eq!(memory.used_percent(), 0.0);
}
#[test]
fn test_memory_used_percent_half() {
let memory = Memory {
used: 50,
total: 100,
};
assert_eq!(memory.used_percent(), 0.5);
}
#[test]
fn test_memory_checker() {
let config = Config::from_json_value(serde_json::json!({
"health": {
"max_memory_percent": 1.0
}
}))
.unwrap();
let memory_checker = MemoryChecker::new(MemoryStat::default(), Arc::new(config));
assert!(memory_checker.check_memory().has_capacity());
let config = Config::from_json_value(serde_json::json!({
"health": {
"max_memory_percent": 0.0
}
}))
.unwrap();
let memory_checker = MemoryChecker::new(MemoryStat::default(), Arc::new(config));
assert!(memory_checker.check_memory().is_exceeded());
}
#[test]
fn test_last_update_is_updated() {
let memory = MemoryStat::new(0);
let first_update = memory.0.last_update.load(Ordering::Relaxed);
sleep(Duration::from_millis(1));
memory.memory();
let second_update = memory.0.last_update.load(Ordering::Relaxed);
assert!(first_update <= second_update);
}
}