Advanced mmap: madvise(), mincore(), and Page-Fault Aware Performance

This article covers Advanced mmap: madvise(), mincore(), and Page-Fault Aware Performance. Go beyond basic mmap. Learn how to hint access patterns with madvise, check residency with mincore, and design page-fault friendly readers in C,...

mmap is a powerful tool, but performance depends on your page fault pattern and how the kernel manages the page cache.

This post focuses on practical knobs:

  • madvise() to communicate access patterns
  • mincore() to observe which pages are resident

1) What madvise actually does

madvise(addr, len, advice) gives the kernel a hint.

It doesn’t guarantee behavior, but it can influence:

  • readahead strategy
  • eviction tendencies
  • THP/huge-page behavior (with other flags)

Common advice values:

  • MADV_SEQUENTIAL : you will touch pages in order
  • MADV_RANDOM : you will touch pages randomly
  • MADV_WILLNEED : prefetch
  • MADV_DONTNEED : drop pages from cache (advisory)

2) Observing residency with mincore

mincore() tells you which pages of a mapping are currently resident in memory.

This is useful for:

  • understanding warm vs cold cache
  • detecting whether your prefetching is effective

3) C: map a file and measure residency

#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>

int main(int argc, char **argv) {
    if (argc != 2) {
        fprintf(stderr, "usage: %s <file>\n", argv[0]);
        return 2;
    }

    int fd = open(argv[1], O_RDONLY);
    if (fd < 0) {
        fprintf(stderr, "open: %s\n", strerror(errno));
        return 1;
    }

    struct stat st;
    if (fstat(fd, &st) != 0) {
        fprintf(stderr, "fstat: %s\n", strerror(errno));
        return 1;
    }

    size_t len = (size_t)st.st_size;
    if (len == 0) return 0;

    void *p = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
    if (p == MAP_FAILED) {
        fprintf(stderr, "mmap: %s\n", strerror(errno));
        return 1;
    }

    // Hint sequential scan.
    madvise(p, len, MADV_SEQUENTIAL);

    long page = sysconf(_SC_PAGESIZE);
    size_t pages = (len + (size_t)page - 1) / (size_t)page;
    unsigned char *vec = (unsigned char*)malloc(pages);
    if (!vec) return 1;

    if (mincore(p, len, vec) != 0) {
        fprintf(stderr, "mincore: %s\n", strerror(errno));
        return 1;
    }

    size_t resident = 0;
    for (size_t i = 0; i < pages; i++) resident += (vec[i] & 1) ? 1 : 0;
    printf("resident pages: %zu/%zu\n", resident, pages);

    // Touch a byte from each page (sequential faulting).
    volatile unsigned char sink = 0;
    unsigned char *b = (unsigned char*)p;
    for (size_t i = 0; i < pages; i++) sink ^= b[i * (size_t)page];
    (void)sink;

    // Re-check residency.
    if (mincore(p, len, vec) != 0) {
        fprintf(stderr, "mincore: %s\n", strerror(errno));
        return 1;
    }

    resident = 0;
    for (size_t i = 0; i < pages; i++) resident += (vec[i] & 1) ? 1 : 0;
    printf("resident after scan: %zu/%zu\n", resident, pages);

    free(vec);
    munmap(p, len);
    close(fd);
    return 0;
}

4) Zig: madvise and mincore

const std = @import("std");
const os = std.os;

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    const a = gpa.allocator();

    var args = try std.process.argsAlloc(a);
    defer std.process.argsFree(a, args);
    if (args.len != 2) return error.InvalidArgs;

    var f = try std.fs.cwd().openFile(args[1], .{});
    defer f.close();

    const st = try f.stat();
    if (st.size == 0) return;

    const len: usize = @intCast(st.size);
    const p = try os.mmap(null, len, os.PROT.READ, os.MAP.PRIVATE, f.handle, 0);
    defer os.munmap(p);

    _ = os.madvise(p, len, os.MADV.SEQUENTIAL) catch {};

    const page = try os.sysconf(os._SC.PAGESIZE);
    const pages = (len + @as(usize, @intCast(page)) - 1) / @as(usize, @intCast(page));

    var vec = try a.alloc(u8, pages);
    defer a.free(vec);

    try os.mincore(p, len, vec);

    var resident: usize = 0;
    for (vec) |v| resident += (v & 1);
    std.debug.print("resident pages: {}/{}\n", .{resident, pages});
}

5) Rust: observe residency (libc)

use std::fs::File;
use std::io;
use std::os::unix::io::AsRawFd;

fn main() -> io::Result<()> {
    let path = std::env::args().nth(1).expect("path");
    let f = File::open(path)?;
    let len = f.metadata()?.len() as usize;
    if len == 0 { return Ok(()); }

    unsafe {
        let p = libc::mmap(
            std::ptr::null_mut(),
            len,
            libc::PROT_READ,
            libc::MAP_PRIVATE,
            f.as_raw_fd(),
            0,
        );
        if p == libc::MAP_FAILED {
            return Err(io::Error::last_os_error());
        }

        let page = libc::sysconf(libc::_SC_PAGESIZE) as usize;
        let pages = (len + page - 1) / page;
        let mut vec = vec![0u8; pages];

        if libc::mincore(p, len, vec.as_mut_ptr() as *mut _) != 0 {
            return Err(io::Error::last_os_error());
        }

        let resident = vec.iter().filter(|b| (**b & 1) != 0).count();
        println!("resident pages: {resident}/{pages}");

        libc::munmap(p, len);
    }

    Ok(())
}

6) Practical advice

  • Use MADV_SEQUENTIAL for linear scans.
  • Use MADV_RANDOM when scanning randomly to avoid bad readahead.
  • Consider MADV_WILLNEED when you know you’ll touch a region soon.
  • Measure with perf stat and page-fault counters.

References