Advanced mmap: madvise(), mincore(), and Page-Fault Aware Performance
This article covers Advanced mmap: madvise(), mincore(), and Page-Fault Aware Performance. Go beyond basic mmap. Learn how to hint access patterns with madvise, check residency with mincore, and design page-fault friendly readers in C,...
mmap is a powerful tool, but performance depends on your page fault pattern and how the kernel manages the page cache.
This post focuses on practical knobs:
madvise()to communicate access patternsmincore()to observe which pages are resident
1) What madvise actually does
madvise(addr, len, advice) gives the kernel a hint.
It doesn’t guarantee behavior, but it can influence:
- readahead strategy
- eviction tendencies
- THP/huge-page behavior (with other flags)
Common advice values:
MADV_SEQUENTIAL: you will touch pages in orderMADV_RANDOM: you will touch pages randomlyMADV_WILLNEED: prefetchMADV_DONTNEED: drop pages from cache (advisory)
2) Observing residency with mincore
mincore() tells you which pages of a mapping are currently resident in memory.
This is useful for:
- understanding warm vs cold cache
- detecting whether your prefetching is effective
3) C: map a file and measure residency
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
int main(int argc, char **argv) {
if (argc != 2) {
fprintf(stderr, "usage: %s <file>\n", argv[0]);
return 2;
}
int fd = open(argv[1], O_RDONLY);
if (fd < 0) {
fprintf(stderr, "open: %s\n", strerror(errno));
return 1;
}
struct stat st;
if (fstat(fd, &st) != 0) {
fprintf(stderr, "fstat: %s\n", strerror(errno));
return 1;
}
size_t len = (size_t)st.st_size;
if (len == 0) return 0;
void *p = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
if (p == MAP_FAILED) {
fprintf(stderr, "mmap: %s\n", strerror(errno));
return 1;
}
// Hint sequential scan.
madvise(p, len, MADV_SEQUENTIAL);
long page = sysconf(_SC_PAGESIZE);
size_t pages = (len + (size_t)page - 1) / (size_t)page;
unsigned char *vec = (unsigned char*)malloc(pages);
if (!vec) return 1;
if (mincore(p, len, vec) != 0) {
fprintf(stderr, "mincore: %s\n", strerror(errno));
return 1;
}
size_t resident = 0;
for (size_t i = 0; i < pages; i++) resident += (vec[i] & 1) ? 1 : 0;
printf("resident pages: %zu/%zu\n", resident, pages);
// Touch a byte from each page (sequential faulting).
volatile unsigned char sink = 0;
unsigned char *b = (unsigned char*)p;
for (size_t i = 0; i < pages; i++) sink ^= b[i * (size_t)page];
(void)sink;
// Re-check residency.
if (mincore(p, len, vec) != 0) {
fprintf(stderr, "mincore: %s\n", strerror(errno));
return 1;
}
resident = 0;
for (size_t i = 0; i < pages; i++) resident += (vec[i] & 1) ? 1 : 0;
printf("resident after scan: %zu/%zu\n", resident, pages);
free(vec);
munmap(p, len);
close(fd);
return 0;
}
4) Zig: madvise and mincore
const std = @import("std");
const os = std.os;
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const a = gpa.allocator();
var args = try std.process.argsAlloc(a);
defer std.process.argsFree(a, args);
if (args.len != 2) return error.InvalidArgs;
var f = try std.fs.cwd().openFile(args[1], .{});
defer f.close();
const st = try f.stat();
if (st.size == 0) return;
const len: usize = @intCast(st.size);
const p = try os.mmap(null, len, os.PROT.READ, os.MAP.PRIVATE, f.handle, 0);
defer os.munmap(p);
_ = os.madvise(p, len, os.MADV.SEQUENTIAL) catch {};
const page = try os.sysconf(os._SC.PAGESIZE);
const pages = (len + @as(usize, @intCast(page)) - 1) / @as(usize, @intCast(page));
var vec = try a.alloc(u8, pages);
defer a.free(vec);
try os.mincore(p, len, vec);
var resident: usize = 0;
for (vec) |v| resident += (v & 1);
std.debug.print("resident pages: {}/{}\n", .{resident, pages});
}
5) Rust: observe residency (libc)
use std::fs::File;
use std::io;
use std::os::unix::io::AsRawFd;
fn main() -> io::Result<()> {
let path = std::env::args().nth(1).expect("path");
let f = File::open(path)?;
let len = f.metadata()?.len() as usize;
if len == 0 { return Ok(()); }
unsafe {
let p = libc::mmap(
std::ptr::null_mut(),
len,
libc::PROT_READ,
libc::MAP_PRIVATE,
f.as_raw_fd(),
0,
);
if p == libc::MAP_FAILED {
return Err(io::Error::last_os_error());
}
let page = libc::sysconf(libc::_SC_PAGESIZE) as usize;
let pages = (len + page - 1) / page;
let mut vec = vec![0u8; pages];
if libc::mincore(p, len, vec.as_mut_ptr() as *mut _) != 0 {
return Err(io::Error::last_os_error());
}
let resident = vec.iter().filter(|b| (**b & 1) != 0).count();
println!("resident pages: {resident}/{pages}");
libc::munmap(p, len);
}
Ok(())
}
6) Practical advice
- Use
MADV_SEQUENTIALfor linear scans. - Use
MADV_RANDOMwhen scanning randomly to avoid bad readahead. - Consider
MADV_WILLNEEDwhen you know you’ll touch a region soon. - Measure with
perf statand page-fault counters.
References
madvise(2): https://man7.org/linux/man-pages/man2/madvise.2.htmlmincore(2): https://man7.org/linux/man-pages/man2/mincore.2.htmlmmap(2): https://man7.org/linux/man-pages/man2/mmap.2.html- Brendan Gregg Linux performance: http://www.brendangregg.com/linuxperf.html