What is ‘page_cache’, how it is managed and how ‘drop_caches’ dropping this pages?

– The “buffers/cache” values reported by free include the page cache, but not the dentry cache which is saved in slab ‘dentry_cache’.

– page cache is increased and decreased based on the disk access activities and managed by each super block (it means each disk).

– ‘echo 1 > /proc/sys/vm/drop_caches’ frees page caches by calling the below function.

int drop_caches_sysctl_handler(ctl_table *table, int write,
     void __user *buffer, size_t *length, loff_t *ppos)
 {
     int ret;
 
     ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
     if (ret)
         return ret;
     if (write) {
         static int stfu;
 
         if (sysctl_drop_caches & 1) {
             iterate_supers(drop_pagecache_sb, NULL);
             count_vm_event(DROP_PAGECACHE);
         }
         if (sysctl_drop_caches & 2) {
             drop_slab();
             count_vm_event(DROP_SLAB);
         }
         if (!stfu) {
             pr_info("%s (%d): drop_caches: %d\n",
                 current->comm, task_pid_nr(current),
                 sysctl_drop_caches);
         }
         stfu |= sysctl_drop_caches & 4;
     }
     return 0;
}


 void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 {
     struct super_block *sb, *p = NULL;
 
     spin_lock(&sb_lock);
     list_for_each_entry(sb, &super_blocks, s_list) {
         if (hlist_unhashed(&sb->s_instances))
             continue;
         sb->s_count++;
         spin_unlock(&sb_lock);
 
         down_read(&sb->s_umount);
         if (sb->s_root && (sb->s_flags & MS_BORN))
             f(sb, arg);
         up_read(&sb->s_umount);
 
         spin_lock(&sb_lock);
         if (p)
             __put_super(p);
         p = sb;
     }
     if (p)
         __put_super(p);
     spin_unlock(&sb_lock);
 }
 
 
  static void drop_pagecache_sb(struct super_block *sb, void *unused)
 {
     struct inode *inode, *toput_inode = NULL;
 
     spin_lock(&inode_sb_list_lock);
     list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
         spin_lock(&inode->i_lock);
         if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
             (inode->i_mapping->nrpages == 0)) {
             spin_unlock(&inode->i_lock);
             continue;
         }
         __iget(inode);
         spin_unlock(&inode->i_lock);
         spin_unlock(&inode_sb_list_lock);
         invalidate_mapping_pages(inode->i_mapping, 0, -1);
         iput(toput_inode);
         toput_inode = inode;
         spin_lock(&inode_sb_list_lock);
     }
     spin_unlock(&inode_sb_list_lock);
     iput(toput_inode);
 }

You can see in the above that it iterate each super block for sb->s_inodes. This sb->s_inodes is containing page caches and can be released by calling invalidate_mapping_pages().

struct super_block {
...
     struct list_head    s_inodes;   /* all inodes */
...
}

struct inode {
...
     struct address_space    *i_mapping;
...
     struct list_head    i_sb_list;
...
}


 struct address_space {
     struct inode        *host;      /* owner: inode, block_device */
     struct radix_tree_root  page_tree;  /* radix tree of all pages */
     spinlock_t      tree_lock;  /* and lock protecting it */
     RH_KABI_REPLACE(unsigned int i_mmap_writable,
              atomic_t i_mmap_writable) /* count VM_SHARED mappings */
     struct rb_root      i_mmap;     /* tree of private and shared mappings */
     struct list_head    i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
     struct mutex        i_mmap_mutex;   /* protect tree, count, list */
     /* Protected by tree_lock together with the radix tree */
     unsigned long       nrpages;    /* number of total pages */
     /* number of shadow or DAX exceptional entries */
     RH_KABI_RENAME(unsigned long nrshadows,
                unsigned long nrexceptional);
     pgoff_t         writeback_index;/* writeback starts here */
     const struct address_space_operations *a_ops;   /* methods */
     unsigned long       flags;      /* error bits/gfp mask */
     struct backing_dev_info *backing_dev_info; /* device readahead, etc */
     spinlock_t      private_lock;   /* for use by the address_space */
     struct list_head    private_list;   /* ditto */
     void            *private_data;  /* ditto */
 } __attribute__((aligned(sizeof(long))));

This is called by using inode->i_mapping which has the details about the memory block for a device, but it doesn’t containing the information about process which was using this block. That’s why we can’t tell which process was consuming the page caches.

Leave a Comment

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

This site uses Akismet to reduce spam. Learn how your comment data is processed.