RHEL4/mm/truncate.c
<<
>>
Prefs
   1/*
   2 * mm/truncate.c - code for taking down pages from address_spaces
   3 *
   4 * Copyright (C) 2002, Linus Torvalds
   5 *
   6 * 10Sep2002    akpm@zip.com.au
   7 *              Initial version.
   8 */
   9
  10#include <linux/kernel.h>
  11#include <linux/mm.h>
  12#include <linux/module.h>
  13#include <linux/pagemap.h>
  14#include <linux/pagevec.h>
  15#include <linux/task_io_accounting_ops.h>
  16#include <linux/buffer_head.h>  /* grr. try_to_release_page,
  17                                   block_invalidatepage */
  18
  19
  20static int do_invalidatepage(struct page *page, unsigned long offset)
  21{
  22        int (*invalidatepage)(struct page *, unsigned long);
  23        invalidatepage = page->mapping->a_ops->invalidatepage;
  24        if (invalidatepage == NULL)
  25                invalidatepage = block_invalidatepage;
  26        return (*invalidatepage)(page, offset);
  27}
  28
  29static inline void truncate_partial_page(struct page *page, unsigned partial)
  30{
  31        memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);
  32        if (PagePrivate(page))
  33                do_invalidatepage(page, partial);
  34}
  35
  36/*
  37 * If truncate cannot remove the fs-private metadata from the page, the page
  38 * becomes anonymous.  It will be left on the LRU and may even be mapped into
  39 * user pagetables if we're racing with filemap_nopage().
  40 *
  41 * We need to bale out if page->mapping is no longer equal to the original
  42 * mapping.  This happens a) when the VM reclaimed the page while we waited on
  43 * its lock, b) when a concurrent invalidate_inode_pages got there first and
  44 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
  45 */
  46void
  47truncate_complete_page(struct address_space *mapping, struct page *page)
  48{
  49        if (page->mapping != mapping)
  50                return;
  51
  52        if (PagePrivate(page))
  53                do_invalidatepage(page, 0);
  54
  55        if (test_clear_page_dirty(page))
  56                task_io_account_cancelled_write(PAGE_CACHE_SIZE);
  57        ClearPageUptodate(page);
  58        ClearPageMappedToDisk(page);
  59        remove_from_page_cache(page);
  60        page_cache_release(page);       /* pagecache ref */
  61}
  62EXPORT_SYMBOL_GPL(truncate_complete_page);
  63
  64/*
  65 * This is for invalidate_inode_pages().  That function can be called at
  66 * any time, and is not supposed to throw away dirty pages.  But pages can
  67 * be marked dirty at any time too.  So we re-check the dirtiness inside
  68 * ->tree_lock.  That provides exclusion against the __set_page_dirty
  69 * functions.
  70 */
  71static int
  72invalidate_complete_page(struct address_space *mapping, struct page *page)
  73{
  74        if (page->mapping != mapping)
  75                return 0;
  76
  77        if (PagePrivate(page) && !try_to_release_page(page, 0))
  78                return 0;
  79
  80        spin_lock_irq(&mapping->tree_lock);
  81        if (PageDirty(page)) {
  82                spin_unlock_irq(&mapping->tree_lock);
  83                return 0;
  84        }
  85
  86        BUG_ON(PagePrivate(page));
  87        __remove_from_page_cache(page);
  88        spin_unlock_irq(&mapping->tree_lock);
  89        ClearPageUptodate(page);
  90        page_cache_release(page);       /* pagecache ref */
  91        return 1;
  92}
  93
  94/**
  95 * truncate_inode_pages - truncate *all* the pages from an offset
  96 * @mapping: mapping to truncate
  97 * @lstart: offset from which to truncate
  98 *
  99 * Truncate the page cache at a set offset, removing the pages that are beyond
 100 * that offset (and zeroing out partial pages).
 101 *
 102 * Truncate takes two passes - the first pass is nonblocking.  It will not
 103 * block on page locks and it will not block on writeback.  The second pass
 104 * will wait.  This is to prevent as much IO as possible in the affected region.
 105 * The first pass will remove most pages, so the search cost of the second pass
 106 * is low.
 107 *
 108 * When looking at page->index outside the page lock we need to be careful to
 109 * copy it into a local to avoid races (it could change at any time).
 110 *
 111 * We pass down the cache-hot hint to the page freeing code.  Even if the
 112 * mapping is large, it is probably the case that the final pages are the most
 113 * recently touched, and freeing happens in ascending file offset order.
 114 *
 115 * Called under (and serialised by) inode->i_sem.
 116 */
 117void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
 118{
 119        const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
 120        const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
 121        struct pagevec pvec;
 122        pgoff_t next;
 123        int i;
 124
 125        if (mapping->nrpages == 0)
 126                return;
 127
 128        pagevec_init(&pvec, 0);
 129        next = start;
 130        while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 131                for (i = 0; i < pagevec_count(&pvec); i++) {
 132                        struct page *page = pvec.pages[i];
 133                        pgoff_t page_index = page->index;
 134
 135                        if (page_index > next)
 136                                next = page_index;
 137                        next++;
 138                        if (TestSetPageLocked(page))
 139                                continue;
 140                        if (PageWriteback(page)) {
 141                                unlock_page(page);
 142                                continue;
 143                        }
 144                        truncate_complete_page(mapping, page);
 145                        unlock_page(page);
 146                }
 147                pagevec_release(&pvec);
 148                cond_resched();
 149        }
 150
 151        if (partial) {
 152                struct page *page = find_lock_page(mapping, start - 1);
 153                if (page) {
 154                        wait_on_page_writeback(page);
 155                        truncate_partial_page(page, partial);
 156                        unlock_page(page);
 157                        page_cache_release(page);
 158                }
 159        }
 160
 161        next = start;
 162        for ( ; ; ) {
 163                cond_resched();
 164                if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 165                        if (next == start)
 166                                break;
 167                        next = start;
 168                        continue;
 169                }
 170                for (i = 0; i < pagevec_count(&pvec); i++) {
 171                        struct page *page = pvec.pages[i];
 172
 173                        lock_page(page);
 174                        wait_on_page_writeback(page);
 175                        if (page->index > next)
 176                                next = page->index;
 177                        next++;
 178                        truncate_complete_page(mapping, page);
 179                        unlock_page(page);
 180                }
 181                pagevec_release(&pvec);
 182        }
 183}
 184
 185EXPORT_SYMBOL(truncate_inode_pages);
 186
 187void invalidate_all_mapping_pages(struct address_space *mapping)
 188{
 189        struct pagevec pvec;
 190        pgoff_t next = 0;
 191        int i;
 192
 193        pagevec_init(&pvec, 0);
 194        while (next <= ~0UL &&
 195                pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 196                for (i = 0; i < pagevec_count(&pvec); i++) {
 197                        struct page *page = pvec.pages[i];
 198
 199                        if (TestSetPageLocked(page)) {
 200                                next++;
 201                                continue;
 202                        }
 203                        if (page->index > next)
 204                                next = page->index;
 205                        next++;
 206                        if (!PageDirty(page) && !PageWriteback(page) &&
 207                            !page_mapped(page))
 208                                invalidate_complete_page(mapping, page);
 209                        unlock_page(page);
 210                        if (next > ~0UL)
 211                        break;
 212                }
 213                pagevec_release(&pvec);
 214        }
 215        return;
 216}
 217
 218
 219/**
 220 * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
 221 * @mapping: the address_space which holds the pages to invalidate
 222 * @start: the offset 'from' which to invalidate
 223 * @end: the offset 'to' which to invalidate (inclusive)
 224 *
 225 * This function only removes the unlocked pages, if you want to
 226 * remove all the pages of one inode, you must call truncate_inode_pages.
 227 *
 228 * invalidate_mapping_pages() will not block on IO activity. It will not
 229 * invalidate pages which are dirty, locked, under writeback or mapped into
 230 * pagetables.
 231 */
 232unsigned long invalidate_mapping_pages(struct address_space *mapping,
 233                                pgoff_t start, pgoff_t end)
 234{
 235        struct pagevec pvec;
 236        pgoff_t next = start;
 237        unsigned long ret = 0;
 238        int i;
 239
 240        pagevec_init(&pvec, 0);
 241        while (next <= end &&
 242                        pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 243                for (i = 0; i < pagevec_count(&pvec); i++) {
 244                        struct page *page = pvec.pages[i];
 245
 246                        if (TestSetPageLocked(page)) {
 247                                next++;
 248                                continue;
 249                        }
 250                        if (page->index > next)
 251                                next = page->index;
 252                        next++;
 253                        if (PageDirty(page) || PageWriteback(page))
 254                                goto unlock;
 255                        if (page_mapped(page))
 256                                goto unlock;
 257                        ret += invalidate_complete_page(mapping, page);
 258unlock:
 259                        unlock_page(page);
 260                        if (next > end)
 261                                break;
 262                }
 263                pagevec_release(&pvec);
 264                cond_resched();
 265        }
 266        return ret;
 267}
 268
 269EXPORT_SYMBOL_GPL(invalidate_mapping_pages);
 270
 271unsigned long invalidate_inode_pages(struct address_space *mapping)
 272{
 273        return invalidate_mapping_pages(mapping, 0, ~0UL);
 274}
 275
 276EXPORT_SYMBOL(invalidate_inode_pages);
 277
 278/**
 279 * invalidate_inode_pages2 - remove all unmapped pages from an address_space
 280 * @mapping - the address_space
 281 *
 282 * invalidate_inode_pages2() is like truncate_inode_pages(), except for the case
 283 * where the page is seen to be mapped into process pagetables.  In that case,
 284 * the page is marked clean but is left attached to its address_space.
 285 *
 286 * The page is also marked not uptodate so that a subsequent pagefault will
 287 * perform I/O to bringthe page's contents back into sync with its backing
 288 * store.
 289 *
 290 * FIXME: invalidate_inode_pages2() is probably trivially livelockable.
 291 */
 292void invalidate_inode_pages2(struct address_space *mapping)
 293{
 294        struct pagevec pvec;
 295        pgoff_t next = 0;
 296        int i;
 297
 298        pagevec_init(&pvec, 0);
 299        while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 300                for (i = 0; i < pagevec_count(&pvec); i++) {
 301                        struct page *page = pvec.pages[i];
 302
 303                        lock_page(page);
 304                        if (page->mapping == mapping) { /* truncate race? */
 305                                wait_on_page_writeback(page);
 306                                next = page->index + 1;
 307                                if (page_mapped(page)) {
 308                                        clear_page_dirty(page);
 309                                        ClearPageUptodate(page);
 310                                } else {
 311                                        if (!invalidate_complete_page(mapping,
 312                                                                      page)) {
 313                                                clear_page_dirty(page);
 314                                                ClearPageUptodate(page);
 315                                        }
 316                                }
 317                        }
 318                        unlock_page(page);
 319                }
 320                pagevec_release(&pvec);
 321                cond_resched();
 322        }
 323}
 324
 325EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
 326
 327/**
 328 * invalidate_inode_pages3_range - remove range of pages from an address_space
 329 * @mapping: the address_space
 330 * @start: the page offset 'from' which to invalidate
 331 * @end: the page offset 'to' which to invalidate (inclusive)
 332 *
 333 * Any pages which are found to be mapped into pagetables are unmapped prior to
 334 * invalidation.
 335 *
 336 * Returns -EIO if any pages could not be invalidated.
 337 */
 338int invalidate_inode_pages3_range(struct address_space *mapping,
 339                                  pgoff_t start, pgoff_t end)
 340{
 341        struct pagevec pvec;
 342        pgoff_t next;
 343        int i;
 344        int ret = 0;
 345        int wrapped = 0;
 346
 347        pagevec_init(&pvec, 0);
 348        next = start;
 349        while (next <= end && !wrapped &&
 350                pagevec_lookup(&pvec, mapping, next,
 351                        min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
 352                for (i = 0; i < pagevec_count(&pvec); i++) {
 353                        struct page *page = pvec.pages[i];
 354                        pgoff_t page_index;
 355
 356                        lock_page(page);
 357                        if (page->mapping != mapping) {
 358                                unlock_page(page);
 359                                continue;
 360                        }
 361                        page_index = page->index;
 362                        next = page_index + 1;
 363                        if (next == 0)
 364                                wrapped = 1;
 365                        if (page_index > end) {
 366                                unlock_page(page);
 367                                break;
 368                        }
 369                        wait_on_page_writeback(page);
 370                        while (page_mapped(page)) {
 371                                unmap_mapping_range(mapping,
 372                                  (loff_t)page_index<<PAGE_CACHE_SHIFT,
 373                                  PAGE_CACHE_SIZE, 0);
 374                        }
 375                        if (!invalidate_complete_page(mapping, page))
 376                                ret = -EIO;
 377                        else {
 378                                /*
 379                                 * Update the truncate_count.  This should
 380                                 * prevent any threads in do_no_page()
 381                                 * who found this page in the page cache
 382                                 * from using it.
 383                                 */
 384                                atomic_inc(&mapping->truncate_count);
 385                        }
 386                        unlock_page(page);
 387                }
 388                pagevec_release(&pvec);
 389                cond_resched();
 390        }
 391        return ret;
 392}
 393EXPORT_SYMBOL_GPL(invalidate_inode_pages3_range);
 394
 395/**
 396 * invalidate_inode_pages3 - remove all pages from an address_space
 397 * @mapping: the address_space
 398 *
 399 * Any pages which are found to be mapped into pagetables are unmapped prior to
 400 * invalidation.
 401 *
 402 * Returns -EIO if any pages could not be invalidated.
 403 */
 404int invalidate_inode_pages3(struct address_space *mapping)
 405{
 406        return invalidate_inode_pages3_range(mapping, 0, -1);
 407}
 408EXPORT_SYMBOL_GPL(invalidate_inode_pages3);
 409