RHEL4/mm/swap_state.c
<<
>>
Prefs
   1/*
   2 *  linux/mm/swap_state.c
   3 *
   4 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5 *  Swap reorganised 29.12.95, Stephen Tweedie
   6 *
   7 *  Rewritten to use page cache, (C) 1998 Stephen Tweedie
   8 */
   9#include <linux/module.h>
  10#include <linux/mm.h>
  11#include <linux/kernel_stat.h>
  12#include <linux/swap.h>
  13#include <linux/init.h>
  14#include <linux/pagemap.h>
  15#include <linux/buffer_head.h>
  16#include <linux/backing-dev.h>
  17
  18#include <asm/pgtable.h>
  19
  20/*
  21 * swapper_space is a fiction, retained to simplify the path through
  22 * vmscan's shrink_list, to make sync_page look nicer, and to allow
  23 * future use of radix_tree tags in the swap cache.
  24 */
  25static struct address_space_operations swap_aops = {
  26        .writepage      = swap_writepage,
  27        .sync_page      = block_sync_page,
  28        .set_page_dirty = __set_page_dirty_nobuffers,
  29};
  30
  31static struct backing_dev_info swap_backing_dev_info = {
  32        .memory_backed  = 1,    /* Does not contribute to dirty memory */
  33        .unplug_io_fn   = swap_unplug_io_fn,
  34};
  35
  36struct address_space swapper_space = {
  37        .page_tree      = RADIX_TREE_INIT(GFP_ATOMIC),
  38        .tree_lock      = SPIN_LOCK_UNLOCKED,
  39        .a_ops          = &swap_aops,
  40        .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
  41        .backing_dev_info = &swap_backing_dev_info,
  42};
  43EXPORT_SYMBOL(swapper_space);
  44
  45#define INC_CACHE_INFO(x)       do { swap_cache_info.x++; } while (0)
  46
  47static struct {
  48        unsigned long add_total;
  49        unsigned long del_total;
  50        unsigned long find_success;
  51        unsigned long find_total;
  52        unsigned long noent_race;
  53        unsigned long exist_race;
  54} swap_cache_info;
  55
  56void show_swap_cache_info(void)
  57{
  58        printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n",
  59                swap_cache_info.add_total, swap_cache_info.del_total,
  60                swap_cache_info.find_success, swap_cache_info.find_total,
  61                swap_cache_info.noent_race, swap_cache_info.exist_race);
  62}
  63
  64/*
  65 * __add_to_swap_cache resembles add_to_page_cache on swapper_space,
  66 * but sets SwapCache flag and private instead of mapping and index.
  67 */
  68static int __add_to_swap_cache(struct page *page,
  69                swp_entry_t entry, int gfp_mask)
  70{
  71        int error;
  72
  73        BUG_ON(PageSwapCache(page));
  74        BUG_ON(PagePrivate(page));
  75        error = radix_tree_preload(gfp_mask);
  76        if (!error) {
  77                spin_lock_irq(&swapper_space.tree_lock);
  78                error = radix_tree_insert(&swapper_space.page_tree,
  79                                                entry.val, page);
  80                if (!error) {
  81                        page_cache_get(page);
  82                        SetPageLocked(page);
  83                        SetPageSwapCache(page);
  84                        page->private = entry.val;
  85                        total_swapcache_pages++;
  86                        pagecache_acct(1);
  87                }
  88                spin_unlock_irq(&swapper_space.tree_lock);
  89                radix_tree_preload_end();
  90        }
  91        return error;
  92}
  93
  94static int add_to_swap_cache(struct page *page, swp_entry_t entry)
  95{
  96        int error;
  97
  98        if (!swap_duplicate(entry)) {
  99                INC_CACHE_INFO(noent_race);
 100                return -ENOENT;
 101        }
 102        error = __add_to_swap_cache(page, entry, GFP_KERNEL);
 103        /*
 104         * Anon pages are already on the LRU, we don't run lru_cache_add here.
 105         */
 106        if (error) {
 107                swap_free(entry);
 108                if (error == -EEXIST)
 109                        INC_CACHE_INFO(exist_race);
 110                return error;
 111        }
 112        INC_CACHE_INFO(add_total);
 113        return 0;
 114}
 115
 116/*
 117 * This must be called only on pages that have
 118 * been verified to be in the swap cache.
 119 */
 120void __delete_from_swap_cache(struct page *page)
 121{
 122        BUG_ON(!PageLocked(page));
 123        BUG_ON(!PageSwapCache(page));
 124        BUG_ON(PageWriteback(page));
 125
 126        radix_tree_delete(&swapper_space.page_tree, page->private);
 127        page->private = 0;
 128        ClearPageSwapCache(page);
 129        total_swapcache_pages--;
 130        pagecache_acct(-1);
 131        INC_CACHE_INFO(del_total);
 132}
 133
 134/**
 135 * add_to_swap - allocate swap space for a page
 136 * @page: page we want to move to swap
 137 *
 138 * Allocate swap space for the page and add the page to the
 139 * swap cache.  Caller needs to hold the page lock. 
 140 */
 141int add_to_swap(struct page * page)
 142{
 143        swp_entry_t entry;
 144        int pf_flags;
 145        int err;
 146
 147        if (!PageLocked(page))
 148                BUG();
 149
 150        for (;;) {
 151                entry = get_swap_page();
 152                if (!entry.val)
 153                        return 0;
 154
 155                /* Radix-tree node allocations are performing
 156                 * GFP_ATOMIC allocations under PF_MEMALLOC.  
 157                 * They can completely exhaust the page allocator.  
 158                 *
 159                 * So PF_MEMALLOC is dropped here.  This causes the slab 
 160                 * allocations to fail earlier, so radix-tree nodes will 
 161                 * then be allocated from the mempool reserves.
 162                 *
 163                 * We're still using __GFP_HIGH for radix-tree node
 164                 * allocations, so some of the emergency pools are available,
 165                 * just not all of them.
 166                 */
 167
 168                pf_flags = current->flags;
 169                current->flags &= ~PF_MEMALLOC;
 170
 171                /*
 172                 * Add it to the swap cache and mark it dirty
 173                 */
 174                err = __add_to_swap_cache(page, entry, GFP_ATOMIC|__GFP_NOWARN);
 175
 176                if (pf_flags & PF_MEMALLOC)
 177                        current->flags |= PF_MEMALLOC;
 178
 179                switch (err) {
 180                case 0:                         /* Success */
 181                        SetPageUptodate(page);
 182                        SetPageDirty(page);
 183                        INC_CACHE_INFO(add_total);
 184                        return 1;
 185                case -EEXIST:
 186                        /* Raced with "speculative" read_swap_cache_async */
 187                        INC_CACHE_INFO(exist_race);
 188                        swap_free(entry);
 189                        continue;
 190                default:
 191                        /* -ENOMEM radix-tree allocation failure */
 192                        swap_free(entry);
 193                        return 0;
 194                }
 195        }
 196}
 197
 198/*
 199 * This must be called only on pages that have
 200 * been verified to be in the swap cache and locked.
 201 * It will never put the page into the free list,
 202 * the caller has a reference on the page.
 203 */
 204void delete_from_swap_cache(struct page *page)
 205{
 206        swp_entry_t entry;
 207
 208        BUG_ON(!PageSwapCache(page));
 209        BUG_ON(!PageLocked(page));
 210        BUG_ON(PageWriteback(page));
 211        BUG_ON(PagePrivate(page));
 212  
 213        entry.val = page->private;
 214
 215        spin_lock_irq(&swapper_space.tree_lock);
 216        __delete_from_swap_cache(page);
 217        spin_unlock_irq(&swapper_space.tree_lock);
 218
 219        swap_free(entry);
 220        page_cache_release(page);
 221}
 222
 223/*
 224 * Strange swizzling function only for use by shmem_writepage
 225 */
 226int move_to_swap_cache(struct page *page, swp_entry_t entry)
 227{
 228        int err = __add_to_swap_cache(page, entry, GFP_ATOMIC);
 229        if (!err) {
 230                remove_from_page_cache(page);
 231                page_cache_release(page);       /* pagecache ref */
 232                if (!swap_duplicate(entry))
 233                        BUG();
 234                SetPageDirty(page);
 235                INC_CACHE_INFO(add_total);
 236        } else if (err == -EEXIST)
 237                INC_CACHE_INFO(exist_race);
 238        return err;
 239}
 240
 241/*
 242 * Strange swizzling function for shmem_getpage (and shmem_unuse)
 243 */
 244int move_from_swap_cache(struct page *page, unsigned long index,
 245                struct address_space *mapping)
 246{
 247        int err = add_to_page_cache(page, mapping, index, GFP_ATOMIC);
 248        if (!err) {
 249                delete_from_swap_cache(page);
 250                /* shift page from clean_pages to dirty_pages list */
 251                ClearPageDirty(page);
 252                set_page_dirty(page);
 253        }
 254        return err;
 255}
 256
 257/* 
 258 * If we are the only user, then try to free up the swap cache. 
 259 * 
 260 * Its ok to check for PageSwapCache without the page lock
 261 * here because we are going to recheck again inside 
 262 * exclusive_swap_page() _with_ the lock. 
 263 *                                      - Marcelo
 264 */
 265static inline void free_swap_cache(struct page *page)
 266{
 267        if (PageSwapCache(page) && !TestSetPageLocked(page)) {
 268                remove_exclusive_swap_page(page);
 269                unlock_page(page);
 270        }
 271}
 272
 273/* 
 274 * Perform a free_page(), also freeing any swap cache associated with
 275 * this page if it is the last user of the page. Can not do a lock_page,
 276 * as we are holding the page_table_lock spinlock.
 277 */
 278void free_page_and_swap_cache(struct page *page)
 279{
 280        free_swap_cache(page);
 281        page_cache_release(page);
 282}
 283
 284/*
 285 * Passed an array of pages, drop them all from swapcache and then release
 286 * them.  They are removed from the LRU and freed if this is their last use.
 287 */
 288void free_pages_and_swap_cache(struct page **pages, int nr)
 289{
 290        int chunk = 16;
 291        struct page **pagep = pages;
 292
 293        lru_add_drain();
 294        while (nr) {
 295                int todo = min(chunk, nr);
 296                int i;
 297
 298                for (i = 0; i < todo; i++)
 299                        free_swap_cache(pagep[i]);
 300                release_pages(pagep, todo, 0);
 301                pagep += todo;
 302                nr -= todo;
 303        }
 304}
 305
 306/*
 307 * Lookup a swap entry in the swap cache. A found page will be returned
 308 * unlocked and with its refcount incremented - we rely on the kernel
 309 * lock getting page table operations atomic even if we drop the page
 310 * lock before returning.
 311 */
 312struct page * lookup_swap_cache(swp_entry_t entry)
 313{
 314        struct page *page;
 315
 316        spin_lock_irq(&swapper_space.tree_lock);
 317        page = radix_tree_lookup(&swapper_space.page_tree, entry.val);
 318        if (page) {
 319                page_cache_get(page);
 320                INC_CACHE_INFO(find_success);
 321        }
 322        spin_unlock_irq(&swapper_space.tree_lock);
 323        INC_CACHE_INFO(find_total);
 324        return page;
 325}
 326
 327/* 
 328 * Locate a page of swap in physical memory, reserving swap cache space
 329 * and reading the disk if it is not already cached.
 330 * A failure return means that either the page allocation failed or that
 331 * the swap entry is no longer in use.
 332 */
 333struct page *read_swap_cache_async(swp_entry_t entry,
 334                        struct vm_area_struct *vma, unsigned long addr)
 335{
 336        struct page *found_page, *new_page = NULL;
 337        int err;
 338
 339        do {
 340                /*
 341                 * First check the swap cache.  Since this is normally
 342                 * called after lookup_swap_cache() failed, re-calling
 343                 * that would confuse statistics.
 344                 */
 345                spin_lock_irq(&swapper_space.tree_lock);
 346                found_page = radix_tree_lookup(&swapper_space.page_tree,
 347                                                entry.val);
 348                if (found_page)
 349                        page_cache_get(found_page);
 350                spin_unlock_irq(&swapper_space.tree_lock);
 351                if (found_page)
 352                        break;
 353
 354                /*
 355                 * Get a new page to read into from swap.
 356                 */
 357                if (!new_page) {
 358                        new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
 359                        if (!new_page)
 360                                break;          /* Out of memory */
 361                }
 362
 363                /*
 364                 * Associate the page with swap entry in the swap cache.
 365                 * May fail (-ENOENT) if swap entry has been freed since
 366                 * our caller observed it.  May fail (-EEXIST) if there
 367                 * is already a page associated with this entry in the
 368                 * swap cache: added by a racing read_swap_cache_async,
 369                 * or by try_to_swap_out (or shmem_writepage) re-using
 370                 * the just freed swap entry for an existing page.
 371                 * May fail (-ENOMEM) if radix-tree node allocation failed.
 372                 */
 373                err = add_to_swap_cache(new_page, entry);
 374                if (!err) {
 375                        /*
 376                         * Initiate read into locked page and return.
 377                         */
 378                        lru_cache_add_active(new_page);
 379                        swap_readpage(NULL, new_page);
 380                        return new_page;
 381                }
 382        } while (err != -ENOENT && err != -ENOMEM);
 383
 384        if (new_page)
 385                page_cache_release(new_page);
 386        return found_page;
 387}
 388