RHEL4/mm/shmem.c
<<
>>
Prefs
   1/*
   2 * Resizable virtual memory filesystem for Linux.
   3 *
   4 * Copyright (C) 2000 Linus Torvalds.
   5 *               2000 Transmeta Corp.
   6 *               2000-2001 Christoph Rohland
   7 *               2000-2001 SAP AG
   8 *               2002 Red Hat Inc.
   9 * Copyright (C) 2002-2004 Hugh Dickins.
  10 * Copyright (C) 2002-2004 VERITAS Software Corporation.
  11 * Copyright (C) 2004 Andi Kleen, SuSE Labs
  12 *
  13 * Extended attribute support for tmpfs:
  14 * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
  15 * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
  16 *
  17 * This file is released under the GPL.
  18 */
  19
  20/*
  21 * This virtual memory filesystem is heavily based on the ramfs. It
  22 * extends ramfs by the ability to use swap and honor resource limits
  23 * which makes it a completely usable filesystem.
  24 */
  25
  26#include <linux/config.h>
  27#include <linux/module.h>
  28#include <linux/init.h>
  29#include <linux/devfs_fs_kernel.h>
  30#include <linux/fs.h>
  31#include <linux/mm.h>
  32#include <linux/mman.h>
  33#include <linux/file.h>
  34#include <linux/swap.h>
  35#include <linux/pagemap.h>
  36#include <linux/string.h>
  37#include <linux/slab.h>
  38#include <linux/backing-dev.h>
  39#include <linux/shmem_fs.h>
  40#include <linux/mount.h>
  41#include <linux/writeback.h>
  42#include <linux/vfs.h>
  43#include <linux/blkdev.h>
  44#include <linux/security.h>
  45#include <linux/swapops.h>
  46#include <linux/mempolicy.h>
  47#include <linux/namei.h>
  48#include <linux/xattr.h>
  49#include <asm/uaccess.h>
  50#include <asm/div64.h>
  51#include <asm/pgtable.h>
  52
  53/* This magic number is used in glibc for posix shared memory */
  54#define TMPFS_MAGIC     0x01021994
  55
  56#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
  57#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
  58#define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
  59
  60#define SHMEM_MAX_INDEX  (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
  61#define SHMEM_MAX_BYTES  ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
  62
  63#define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
  64
  65/* info->flags needs VM_flags to handle pagein/truncate races efficiently */
  66#define SHMEM_PAGEIN     VM_READ
  67#define SHMEM_TRUNCATE   VM_WRITE
  68
  69/* Pretend that each entry is of this size in directory's i_size */
  70#define BOGO_DIRENT_SIZE 20
  71
  72/* Keep swapped page count in private field of indirect struct page */
  73#define nr_swapped              private
  74
  75/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
  76enum sgp_type {
  77        SGP_QUICK,      /* don't try more than file page cache lookup */
  78        SGP_READ,       /* don't exceed i_size, don't allocate page */
  79        SGP_CACHE,      /* don't exceed i_size, may allocate page */
  80        SGP_WRITE,      /* may exceed i_size, may allocate page */
  81};
  82
  83static int shmem_getpage(struct inode *inode, unsigned long idx,
  84                         struct page **pagep, enum sgp_type sgp, int *type);
  85
  86static inline struct page *shmem_dir_alloc(unsigned int gfp_mask)
  87{
  88        /*
  89         * The above definition of ENTRIES_PER_PAGE, and the use of
  90         * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
  91         * might be reconsidered if it ever diverges from PAGE_SIZE.
  92         */
  93        return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT);
  94}
  95
  96static inline void shmem_dir_free(struct page *page)
  97{
  98        __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
  99}
 100
 101static struct page **shmem_dir_map(struct page *page)
 102{
 103        return (struct page **)kmap_atomic(page, KM_USER0);
 104}
 105
 106static inline void shmem_dir_unmap(struct page **dir)
 107{
 108        kunmap_atomic(dir, KM_USER0);
 109}
 110
 111static swp_entry_t *shmem_swp_map(struct page *page)
 112{
 113        return (swp_entry_t *)kmap_atomic(page, KM_USER1);
 114}
 115
 116static inline void shmem_swp_balance_unmap(void)
 117{
 118        /*
 119         * When passing a pointer to an i_direct entry, to code which
 120         * also handles indirect entries and so will shmem_swp_unmap,
 121         * we must arrange for the preempt count to remain in balance.
 122         * What kmap_atomic of a lowmem page does depends on config
 123         * and architecture, so pretend to kmap_atomic some lowmem page.
 124         */
 125        (void) kmap_atomic(ZERO_PAGE(0), KM_USER1);
 126}
 127
 128static inline void shmem_swp_unmap(swp_entry_t *entry)
 129{
 130        kunmap_atomic(entry, KM_USER1);
 131}
 132
 133static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
 134{
 135        return sb->s_fs_info;
 136}
 137
 138/*
 139 * shmem_file_setup pre-accounts the whole fixed size of a VM object,
 140 * for shared memory and for shared anonymous (/dev/zero) mappings
 141 * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
 142 * consistent with the pre-accounting of private mappings ...
 143 */
 144static inline int shmem_acct_size(unsigned long flags, loff_t size)
 145{
 146        return (flags & VM_ACCOUNT)?
 147                security_vm_enough_memory(VM_ACCT(size)): 0;
 148}
 149
 150static inline void shmem_unacct_size(unsigned long flags, loff_t size)
 151{
 152        if (flags & VM_ACCOUNT)
 153                vm_unacct_memory(VM_ACCT(size));
 154}
 155
 156/*
 157 * ... whereas tmpfs objects are accounted incrementally as
 158 * pages are allocated, in order to allow huge sparse files.
 159 * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
 160 * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
 161 */
 162static inline int shmem_acct_block(unsigned long flags)
 163{
 164        return (flags & VM_ACCOUNT)?
 165                0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE));
 166}
 167
 168static inline void shmem_unacct_blocks(unsigned long flags, long pages)
 169{
 170        if (!(flags & VM_ACCOUNT))
 171                vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
 172}
 173
 174static struct super_operations shmem_ops;
 175static struct address_space_operations shmem_aops;
 176static struct file_operations shmem_file_operations;
 177static struct inode_operations shmem_inode_operations;
 178static struct inode_operations shmem_dir_inode_operations;
 179static struct inode_operations shmem_special_inode_operations;
 180static struct vm_operations_struct shmem_vm_ops;
 181
 182static struct backing_dev_info shmem_backing_dev_info = {
 183        .ra_pages       = 0,    /* No readahead */
 184        .memory_backed  = 1,    /* Does not contribute to dirty memory */
 185        .unplug_io_fn = default_unplug_io_fn,
 186};
 187
 188static LIST_HEAD(shmem_swaplist);
 189static spinlock_t shmem_swaplist_lock = SPIN_LOCK_UNLOCKED;
 190
 191static void shmem_free_block(struct inode *inode)
 192{
 193        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 194        if (sbinfo) {
 195                spin_lock(&sbinfo->stat_lock);
 196                sbinfo->free_blocks++;
 197                inode->i_blocks -= BLOCKS_PER_PAGE;
 198                spin_unlock(&sbinfo->stat_lock);
 199        }
 200}
 201
 202/*
 203 * shmem_recalc_inode - recalculate the size of an inode
 204 *
 205 * @inode: inode to recalc
 206 *
 207 * We have to calculate the free blocks since the mm can drop
 208 * undirtied hole pages behind our back.
 209 *
 210 * But normally   info->alloced == inode->i_mapping->nrpages + info->swapped
 211 * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
 212 *
 213 * It has to be called with the spinlock held.
 214 */
 215static void shmem_recalc_inode(struct inode *inode)
 216{
 217        struct shmem_inode_info *info = SHMEM_I(inode);
 218        long freed;
 219
 220        freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
 221        if (freed > 0) {
 222                struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 223                info->alloced -= freed;
 224                shmem_unacct_blocks(info->flags, freed);
 225                if (sbinfo) {
 226                        spin_lock(&sbinfo->stat_lock);
 227                        sbinfo->free_blocks += freed;
 228                        inode->i_blocks -= freed*BLOCKS_PER_PAGE;
 229                        spin_unlock(&sbinfo->stat_lock);
 230                }
 231        }
 232}
 233
 234/*
 235 * shmem_swp_entry - find the swap vector position in the info structure
 236 *
 237 * @info:  info structure for the inode
 238 * @index: index of the page to find
 239 * @page:  optional page to add to the structure. Has to be preset to
 240 *         all zeros
 241 *
 242 * If there is no space allocated yet it will return NULL when
 243 * page is NULL, else it will use the page for the needed block,
 244 * setting it to NULL on return to indicate that it has been used.
 245 *
 246 * The swap vector is organized the following way:
 247 *
 248 * There are SHMEM_NR_DIRECT entries directly stored in the
 249 * shmem_inode_info structure. So small files do not need an addional
 250 * allocation.
 251 *
 252 * For pages with index > SHMEM_NR_DIRECT there is the pointer
 253 * i_indirect which points to a page which holds in the first half
 254 * doubly indirect blocks, in the second half triple indirect blocks:
 255 *
 256 * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
 257 * following layout (for SHMEM_NR_DIRECT == 16):
 258 *
 259 * i_indirect -> dir --> 16-19
 260 *            |      +-> 20-23
 261 *            |
 262 *            +-->dir2 --> 24-27
 263 *            |        +-> 28-31
 264 *            |        +-> 32-35
 265 *            |        +-> 36-39
 266 *            |
 267 *            +-->dir3 --> 40-43
 268 *                     +-> 44-47
 269 *                     +-> 48-51
 270 *                     +-> 52-55
 271 */
 272static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page)
 273{
 274        unsigned long offset;
 275        struct page **dir;
 276        struct page *subdir;
 277
 278        if (index < SHMEM_NR_DIRECT) {
 279                shmem_swp_balance_unmap();
 280                return info->i_direct+index;
 281        }
 282        if (!info->i_indirect) {
 283                if (page) {
 284                        info->i_indirect = *page;
 285                        *page = NULL;
 286                }
 287                return NULL;                    /* need another page */
 288        }
 289
 290        index -= SHMEM_NR_DIRECT;
 291        offset = index % ENTRIES_PER_PAGE;
 292        index /= ENTRIES_PER_PAGE;
 293        dir = shmem_dir_map(info->i_indirect);
 294
 295        if (index >= ENTRIES_PER_PAGE/2) {
 296                index -= ENTRIES_PER_PAGE/2;
 297                dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
 298                index %= ENTRIES_PER_PAGE;
 299                subdir = *dir;
 300                if (!subdir) {
 301                        if (page) {
 302                                *dir = *page;
 303                                *page = NULL;
 304                        }
 305                        shmem_dir_unmap(dir);
 306                        return NULL;            /* need another page */
 307                }
 308                shmem_dir_unmap(dir);
 309                dir = shmem_dir_map(subdir);
 310        }
 311
 312        dir += index;
 313        subdir = *dir;
 314        if (!subdir) {
 315                if (!page || !(subdir = *page)) {
 316                        shmem_dir_unmap(dir);
 317                        return NULL;            /* need a page */
 318                }
 319                *dir = subdir;
 320                *page = NULL;
 321        }
 322        shmem_dir_unmap(dir);
 323        return shmem_swp_map(subdir) + offset;
 324}
 325
 326static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value)
 327{
 328        long incdec = value? 1: -1;
 329
 330        entry->val = value;
 331        info->swapped += incdec;
 332        if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT)
 333                kmap_atomic_to_page(entry)->nr_swapped += incdec;
 334}
 335
 336/*
 337 * shmem_swp_alloc - get the position of the swap entry for the page.
 338 *                   If it does not exist allocate the entry.
 339 *
 340 * @info:       info structure for the inode
 341 * @index:      index of the page to find
 342 * @sgp:        check and recheck i_size? skip allocation?
 343 */
 344static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
 345{
 346        struct inode *inode = &info->vfs_inode;
 347        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 348        struct page *page = NULL;
 349        swp_entry_t *entry;
 350
 351        if (sgp != SGP_WRITE &&
 352            ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
 353                return ERR_PTR(-EINVAL);
 354
 355        while (!(entry = shmem_swp_entry(info, index, &page))) {
 356                if (sgp == SGP_READ)
 357                        return shmem_swp_map(ZERO_PAGE(0));
 358                /*
 359                 * Test free_blocks against 1 not 0, since we have 1 data
 360                 * page (and perhaps indirect index pages) yet to allocate:
 361                 * a waste to allocate index if we cannot allocate data.
 362                 */
 363                if (sbinfo) {
 364                        spin_lock(&sbinfo->stat_lock);
 365                        if (sbinfo->free_blocks <= 1) {
 366                                spin_unlock(&sbinfo->stat_lock);
 367                                return ERR_PTR(-ENOSPC);
 368                        }
 369                        sbinfo->free_blocks--;
 370                        inode->i_blocks += BLOCKS_PER_PAGE;
 371                        spin_unlock(&sbinfo->stat_lock);
 372                }
 373
 374                spin_unlock(&info->lock);
 375                page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
 376                if (page) {
 377                        clear_highpage(page);
 378                        page->nr_swapped = 0;
 379                }
 380                spin_lock(&info->lock);
 381
 382                if (!page) {
 383                        shmem_free_block(inode);
 384                        return ERR_PTR(-ENOMEM);
 385                }
 386                if (sgp != SGP_WRITE &&
 387                    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
 388                        entry = ERR_PTR(-EINVAL);
 389                        break;
 390                }
 391                if (info->next_index <= index)
 392                        info->next_index = index + 1;
 393        }
 394        if (page) {
 395                /* another task gave its page, or truncated the file */
 396                shmem_free_block(inode);
 397                shmem_dir_free(page);
 398        }
 399        if (info->next_index <= index && !IS_ERR(entry))
 400                info->next_index = index + 1;
 401        return entry;
 402}
 403
 404/*
 405 * shmem_free_swp - free some swap entries in a directory
 406 *
 407 * @dir:   pointer to the directory
 408 * @edir:  pointer after last entry of the directory
 409 */
 410static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir)
 411{
 412        swp_entry_t *ptr;
 413        int freed = 0;
 414
 415        for (ptr = dir; ptr < edir; ptr++) {
 416                if (ptr->val) {
 417                        free_swap_and_cache(*ptr);
 418                        *ptr = (swp_entry_t){0};
 419                        freed++;
 420                }
 421        }
 422        return freed;
 423}
 424
 425static void shmem_truncate(struct inode *inode)
 426{
 427        struct shmem_inode_info *info = SHMEM_I(inode);
 428        unsigned long idx;
 429        unsigned long size;
 430        unsigned long limit;
 431        unsigned long stage;
 432        struct page **dir;
 433        struct page *subdir;
 434        struct page *empty;
 435        swp_entry_t *ptr;
 436        int offset;
 437        int freed;
 438
 439        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 440        idx = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 441        if (idx >= info->next_index)
 442                return;
 443
 444        spin_lock(&info->lock);
 445        info->flags |= SHMEM_TRUNCATE;
 446        limit = info->next_index;
 447        info->next_index = idx;
 448        if (info->swapped && idx < SHMEM_NR_DIRECT) {
 449                ptr = info->i_direct;
 450                size = limit;
 451                if (size > SHMEM_NR_DIRECT)
 452                        size = SHMEM_NR_DIRECT;
 453                info->swapped -= shmem_free_swp(ptr+idx, ptr+size);
 454        }
 455        if (!info->i_indirect)
 456                goto done2;
 457
 458        BUG_ON(limit <= SHMEM_NR_DIRECT);
 459        limit -= SHMEM_NR_DIRECT;
 460        idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
 461        offset = idx % ENTRIES_PER_PAGE;
 462        idx -= offset;
 463
 464        empty = NULL;
 465        dir = shmem_dir_map(info->i_indirect);
 466        stage = ENTRIES_PER_PAGEPAGE/2;
 467        if (idx < ENTRIES_PER_PAGEPAGE/2)
 468                dir += idx/ENTRIES_PER_PAGE;
 469        else {
 470                dir += ENTRIES_PER_PAGE/2;
 471                dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
 472                while (stage <= idx)
 473                        stage += ENTRIES_PER_PAGEPAGE;
 474                if (*dir) {
 475                        subdir = *dir;
 476                        size = ((idx - ENTRIES_PER_PAGEPAGE/2) %
 477                                ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
 478                        if (!size && !offset) {
 479                                empty = subdir;
 480                                *dir = NULL;
 481                        }
 482                        shmem_dir_unmap(dir);
 483                        dir = shmem_dir_map(subdir) + size;
 484                } else {
 485                        offset = 0;
 486                        idx = stage;
 487                }
 488        }
 489
 490        for (; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
 491                if (unlikely(idx == stage)) {
 492                        shmem_dir_unmap(dir-1);
 493                        dir = shmem_dir_map(info->i_indirect) +
 494                            ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
 495                        while (!*dir) {
 496                                dir++;
 497                                idx += ENTRIES_PER_PAGEPAGE;
 498                                if (idx >= limit)
 499                                        goto done1;
 500                        }
 501                        stage = idx + ENTRIES_PER_PAGEPAGE;
 502                        subdir = *dir;
 503                        *dir = NULL;
 504                        shmem_dir_unmap(dir);
 505                        if (empty) {
 506                                shmem_dir_free(empty);
 507                                shmem_free_block(inode);
 508                        }
 509                        empty = subdir;
 510                        cond_resched_lock(&info->lock);
 511                        dir = shmem_dir_map(subdir);
 512                }
 513                subdir = *dir;
 514                if (subdir && subdir->nr_swapped) {
 515                        ptr = shmem_swp_map(subdir);
 516                        size = limit - idx;
 517                        if (size > ENTRIES_PER_PAGE)
 518                                size = ENTRIES_PER_PAGE;
 519                        freed = shmem_free_swp(ptr+offset, ptr+size);
 520                        shmem_swp_unmap(ptr);
 521                        info->swapped -= freed;
 522                        subdir->nr_swapped -= freed;
 523                        BUG_ON(subdir->nr_swapped > offset);
 524                }
 525                if (offset)
 526                        offset = 0;
 527                else if (subdir) {
 528                        *dir = NULL;
 529                        shmem_dir_free(subdir);
 530                        shmem_free_block(inode);
 531                }
 532        }
 533done1:
 534        shmem_dir_unmap(dir-1);
 535        if (empty) {
 536                shmem_dir_free(empty);
 537                shmem_free_block(inode);
 538        }
 539        if (info->next_index <= SHMEM_NR_DIRECT) {
 540                shmem_dir_free(info->i_indirect);
 541                info->i_indirect = NULL;
 542                shmem_free_block(inode);
 543        }
 544done2:
 545        BUG_ON(info->swapped > info->next_index);
 546        if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
 547                /*
 548                 * Call truncate_inode_pages again: racing shmem_unuse_inode
 549                 * may have swizzled a page in from swap since vmtruncate or
 550                 * generic_delete_inode did it, before we lowered next_index.
 551                 * Also, though shmem_getpage checks i_size before adding to
 552                 * cache, no recheck after: so fix the narrow window there too.
 553                 */
 554                spin_unlock(&info->lock);
 555                truncate_inode_pages(inode->i_mapping, inode->i_size);
 556                spin_lock(&info->lock);
 557        }
 558        info->flags &= ~SHMEM_TRUNCATE;
 559        shmem_recalc_inode(inode);
 560        spin_unlock(&info->lock);
 561}
 562
 563static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 564{
 565        struct inode *inode = dentry->d_inode;
 566        struct page *page = NULL;
 567        int error;
 568
 569        if (attr->ia_valid & ATTR_SIZE) {
 570                if (attr->ia_size < inode->i_size) {
 571                        /*
 572                         * If truncating down to a partial page, then
 573                         * if that page is already allocated, hold it
 574                         * in memory until the truncation is over, so
 575                         * truncate_partial_page cannnot miss it were
 576                         * it assigned to swap.
 577                         */
 578                        if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
 579                                (void) shmem_getpage(inode,
 580                                        attr->ia_size>>PAGE_CACHE_SHIFT,
 581                                                &page, SGP_READ, NULL);
 582                        }
 583                        /*
 584                         * Reset SHMEM_PAGEIN flag so that shmem_truncate can
 585                         * detect if any pages might have been added to cache
 586                         * after truncate_inode_pages.  But we needn't bother
 587                         * if it's being fully truncated to zero-length: the
 588                         * nrpages check is efficient enough in that case.
 589                         */
 590                        if (attr->ia_size) {
 591                                struct shmem_inode_info *info = SHMEM_I(inode);
 592                                spin_lock(&info->lock);
 593                                info->flags &= ~SHMEM_PAGEIN;
 594                                spin_unlock(&info->lock);
 595                        }
 596                }
 597        }
 598
 599        error = inode_change_ok(inode, attr);
 600        if (!error)
 601                error = inode_setattr(inode, attr);
 602        if (page)
 603                page_cache_release(page);
 604        return error;
 605}
 606
 607static void shmem_delete_inode(struct inode *inode)
 608{
 609        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 610        struct shmem_inode_info *info = SHMEM_I(inode);
 611
 612        if (inode->i_op->truncate == shmem_truncate) {
 613                shmem_unacct_size(info->flags, inode->i_size);
 614                inode->i_size = 0;
 615                shmem_truncate(inode);
 616                if (!list_empty(&info->swaplist)) {
 617                        spin_lock(&shmem_swaplist_lock);
 618                        list_del_init(&info->swaplist);
 619                        spin_unlock(&shmem_swaplist_lock);
 620                }
 621        }
 622        if (sbinfo) {
 623                BUG_ON(inode->i_blocks);
 624                spin_lock(&sbinfo->stat_lock);
 625                sbinfo->free_inodes++;
 626                spin_unlock(&sbinfo->stat_lock);
 627        }
 628        clear_inode(inode);
 629}
 630
 631static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
 632{
 633        swp_entry_t *ptr;
 634
 635        for (ptr = dir; ptr < edir; ptr++) {
 636                if (ptr->val == entry.val)
 637                        return ptr - dir;
 638        }
 639        return -1;
 640}
 641
 642static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
 643{
 644        struct inode *inode;
 645        unsigned long idx;
 646        unsigned long size;
 647        unsigned long limit;
 648        unsigned long stage;
 649        struct page **dir;
 650        struct page *subdir;
 651        swp_entry_t *ptr;
 652        int offset;
 653
 654        idx = 0;
 655        ptr = info->i_direct;
 656        spin_lock(&info->lock);
 657        limit = info->next_index;
 658        size = limit;
 659        if (size > SHMEM_NR_DIRECT)
 660                size = SHMEM_NR_DIRECT;
 661        offset = shmem_find_swp(entry, ptr, ptr+size);
 662        if (offset >= 0) {
 663                shmem_swp_balance_unmap();
 664                goto found;
 665        }
 666        if (!info->i_indirect)
 667                goto lost2;
 668        /* we might be racing with shmem_truncate */
 669        if (limit <= SHMEM_NR_DIRECT)
 670                goto lost2;
 671
 672        dir = shmem_dir_map(info->i_indirect);
 673        stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
 674
 675        for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
 676                if (unlikely(idx == stage)) {
 677                        shmem_dir_unmap(dir-1);
 678                        dir = shmem_dir_map(info->i_indirect) +
 679                            ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
 680                        while (!*dir) {
 681                                dir++;
 682                                idx += ENTRIES_PER_PAGEPAGE;
 683                                if (idx >= limit)
 684                                        goto lost1;
 685                        }
 686                        stage = idx + ENTRIES_PER_PAGEPAGE;
 687                        subdir = *dir;
 688                        shmem_dir_unmap(dir);
 689                        dir = shmem_dir_map(subdir);
 690                }
 691                subdir = *dir;
 692                if (subdir && subdir->nr_swapped) {
 693                        ptr = shmem_swp_map(subdir);
 694                        size = limit - idx;
 695                        if (size > ENTRIES_PER_PAGE)
 696                                size = ENTRIES_PER_PAGE;
 697                        offset = shmem_find_swp(entry, ptr, ptr+size);
 698                        if (offset >= 0) {
 699                                shmem_dir_unmap(dir);
 700                                goto found;
 701                        }
 702                        shmem_swp_unmap(ptr);
 703                }
 704        }
 705lost1:
 706        shmem_dir_unmap(dir-1);
 707lost2:
 708        spin_unlock(&info->lock);
 709        return 0;
 710found:
 711        idx += offset;
 712        inode = &info->vfs_inode;
 713        if (move_from_swap_cache(page, idx, inode->i_mapping) == 0) {
 714                info->flags |= SHMEM_PAGEIN;
 715                shmem_swp_set(info, ptr + offset, 0);
 716        }
 717        shmem_swp_unmap(ptr);
 718        spin_unlock(&info->lock);
 719        /*
 720         * Decrement swap count even when the entry is left behind:
 721         * try_to_unuse will skip over mms, then reincrement count.
 722         */
 723        swap_free(entry);
 724        return 1;
 725}
 726
 727/*
 728 * shmem_unuse() search for an eventually swapped out shmem page.
 729 */
 730int shmem_unuse(swp_entry_t entry, struct page *page)
 731{
 732        struct list_head *p, *next;
 733        struct shmem_inode_info *info;
 734        int found = 0;
 735
 736        spin_lock(&shmem_swaplist_lock);
 737        list_for_each_safe(p, next, &shmem_swaplist) {
 738                info = list_entry(p, struct shmem_inode_info, swaplist);
 739                if (!info->swapped)
 740                        list_del_init(&info->swaplist);
 741                else if (shmem_unuse_inode(info, entry, page)) {
 742                        /* move head to start search for next from here */
 743                        list_move_tail(&shmem_swaplist, &info->swaplist);
 744                        found = 1;
 745                        break;
 746                }
 747        }
 748        spin_unlock(&shmem_swaplist_lock);
 749        return found;
 750}
 751
 752/*
 753 * Move the page from the page cache to the swap cache.
 754 */
 755static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 756{
 757        struct shmem_inode_info *info;
 758        swp_entry_t *entry, swap;
 759        struct address_space *mapping;
 760        unsigned long index;
 761        struct inode *inode;
 762
 763        BUG_ON(!PageLocked(page));
 764        BUG_ON(page_mapped(page));
 765
 766        mapping = page->mapping;
 767        index = page->index;
 768        inode = mapping->host;
 769        info = SHMEM_I(inode);
 770        if (info->flags & VM_LOCKED)
 771                goto redirty;
 772        swap = get_swap_page();
 773        if (!swap.val)
 774                goto redirty;
 775
 776        spin_lock(&info->lock);
 777        shmem_recalc_inode(inode);
 778        if (index >= info->next_index) {
 779                BUG_ON(!(info->flags & SHMEM_TRUNCATE));
 780                goto unlock;
 781        }
 782        entry = shmem_swp_entry(info, index, NULL);
 783        BUG_ON(!entry);
 784        BUG_ON(entry->val);
 785
 786        if (move_to_swap_cache(page, swap) == 0) {
 787                shmem_swp_set(info, entry, swap.val);
 788                shmem_swp_unmap(entry);
 789                spin_unlock(&info->lock);
 790                if (list_empty(&info->swaplist)) {
 791                        spin_lock(&shmem_swaplist_lock);
 792                        /* move instead of add in case we're racing */
 793                        list_move_tail(&info->swaplist, &shmem_swaplist);
 794                        spin_unlock(&shmem_swaplist_lock);
 795                }
 796                unlock_page(page);
 797                return 0;
 798        }
 799
 800        shmem_swp_unmap(entry);
 801unlock:
 802        spin_unlock(&info->lock);
 803        swap_free(swap);
 804redirty:
 805        set_page_dirty(page);
 806        return WRITEPAGE_ACTIVATE;      /* Return with the page locked */
 807}
 808
 809#ifdef CONFIG_NUMA
 810static struct page *shmem_swapin_async(struct shared_policy *p,
 811                                       swp_entry_t entry, unsigned long idx)
 812{
 813        struct page *page;
 814        struct vm_area_struct pvma;
 815
 816        /* Create a pseudo vma that just contains the policy */
 817        memset(&pvma, 0, sizeof(struct vm_area_struct));
 818        pvma.vm_end = PAGE_SIZE;
 819        pvma.vm_pgoff = idx;
 820        pvma.vm_policy = mpol_shared_policy_lookup(p, idx);
 821        page = read_swap_cache_async(entry, &pvma, 0);
 822        mpol_free(pvma.vm_policy);
 823        return page;
 824}
 825
 826struct page *shmem_swapin(struct shmem_inode_info *info, swp_entry_t entry,
 827                          unsigned long idx)
 828{
 829        struct shared_policy *p = &info->policy;
 830        int i, num;
 831        struct page *page;
 832        unsigned long offset;
 833
 834        num = valid_swaphandles(entry, &offset);
 835        for (i = 0; i < num; offset++, i++) {
 836                page = shmem_swapin_async(p,
 837                                swp_entry(swp_type(entry), offset), idx);
 838                if (!page)
 839                        break;
 840                page_cache_release(page);
 841        }
 842        lru_add_drain();        /* Push any new pages onto the LRU now */
 843        return shmem_swapin_async(p, entry, idx);
 844}
 845
 846static struct page *
 847shmem_alloc_page(unsigned long gfp, struct shmem_inode_info *info,
 848                 unsigned long idx)
 849{
 850        struct vm_area_struct pvma;
 851        struct page *page;
 852
 853        memset(&pvma, 0, sizeof(struct vm_area_struct));
 854        pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
 855        pvma.vm_pgoff = idx;
 856        pvma.vm_end = PAGE_SIZE;
 857        page = alloc_page_vma(gfp, &pvma, 0);
 858        mpol_free(pvma.vm_policy);
 859        return page;
 860}
 861#else
 862static inline struct page *
 863shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
 864{
 865        swapin_readahead(entry, 0, NULL);
 866        return read_swap_cache_async(entry, NULL, 0);
 867}
 868
 869static inline struct page *
 870shmem_alloc_page(unsigned long gfp,struct shmem_inode_info *info,
 871                                 unsigned long idx)
 872{
 873        return alloc_page(gfp);
 874}
 875#endif
 876
 877/*
 878 * shmem_getpage - either get the page from swap or allocate a new one
 879 *
 880 * If we allocate a new one we do not mark it dirty. That's up to the
 881 * vm. If we swap it in we mark it dirty since we also free the swap
 882 * entry since a page cannot live in both the swap and page cache
 883 */
 884static int shmem_getpage(struct inode *inode, unsigned long idx,
 885                        struct page **pagep, enum sgp_type sgp, int *type)
 886{
 887        struct address_space *mapping = inode->i_mapping;
 888        struct shmem_inode_info *info = SHMEM_I(inode);
 889        struct shmem_sb_info *sbinfo;
 890        struct page *filepage = *pagep;
 891        struct page *swappage;
 892        swp_entry_t *entry;
 893        swp_entry_t swap;
 894        int error;
 895
 896        if (idx >= SHMEM_MAX_INDEX)
 897                return -EFBIG;
 898        /*
 899         * Normally, filepage is NULL on entry, and either found
 900         * uptodate immediately, or allocated and zeroed, or read
 901         * in under swappage, which is then assigned to filepage.
 902         * But shmem_prepare_write passes in a locked filepage,
 903         * which may be found not uptodate by other callers too,
 904         * and may need to be copied from the swappage read in.
 905         */
 906repeat:
 907        if (!filepage)
 908                filepage = find_lock_page(mapping, idx);
 909        if (filepage && PageUptodate(filepage))
 910                goto done;
 911        error = 0;
 912        if (sgp == SGP_QUICK)
 913                goto failed;
 914
 915        spin_lock(&info->lock);
 916        shmem_recalc_inode(inode);
 917        entry = shmem_swp_alloc(info, idx, sgp);
 918        if (IS_ERR(entry)) {
 919                spin_unlock(&info->lock);
 920                error = PTR_ERR(entry);
 921                goto failed;
 922        }
 923        swap = *entry;
 924
 925        if (swap.val) {
 926                /* Look it up and read it in.. */
 927                swappage = lookup_swap_cache(swap);
 928                if (!swappage) {
 929                        shmem_swp_unmap(entry);
 930                        spin_unlock(&info->lock);
 931                        /* here we actually do the io */
 932                        if (type && *type == VM_FAULT_MINOR) {
 933                                inc_page_state(pgmajfault);
 934                                *type = VM_FAULT_MAJOR;
 935                        }
 936                        swappage = shmem_swapin(info, swap, idx);
 937                        if (!swappage) {
 938                                spin_lock(&info->lock);
 939                                entry = shmem_swp_alloc(info, idx, sgp);
 940                                if (IS_ERR(entry))
 941                                        error = PTR_ERR(entry);
 942                                else {
 943                                        if (entry->val == swap.val)
 944                                                error = -ENOMEM;
 945                                        shmem_swp_unmap(entry);
 946                                }
 947                                spin_unlock(&info->lock);
 948                                if (error)
 949                                        goto failed;
 950                                goto repeat;
 951                        }
 952                        wait_on_page_locked(swappage);
 953                        page_cache_release(swappage);
 954                        goto repeat;
 955                }
 956
 957                /* We have to do this with page locked to prevent races */
 958                if (TestSetPageLocked(swappage)) {
 959                        shmem_swp_unmap(entry);
 960                        spin_unlock(&info->lock);
 961                        wait_on_page_locked(swappage);
 962                        page_cache_release(swappage);
 963                        goto repeat;
 964                }
 965                if (PageWriteback(swappage)) {
 966                        shmem_swp_unmap(entry);
 967                        spin_unlock(&info->lock);
 968                        wait_on_page_writeback(swappage);
 969                        unlock_page(swappage);
 970                        page_cache_release(swappage);
 971                        goto repeat;
 972                }
 973                if (!PageUptodate(swappage)) {
 974                        shmem_swp_unmap(entry);
 975                        spin_unlock(&info->lock);
 976                        unlock_page(swappage);
 977                        page_cache_release(swappage);
 978                        error = -EIO;
 979                        goto failed;
 980                }
 981
 982                if (filepage) {
 983                        shmem_swp_set(info, entry, 0);
 984                        shmem_swp_unmap(entry);
 985                        delete_from_swap_cache(swappage);
 986                        spin_unlock(&info->lock);
 987                        copy_highpage(filepage, swappage);
 988                        unlock_page(swappage);
 989                        page_cache_release(swappage);
 990                        flush_dcache_page(filepage);
 991                        SetPageUptodate(filepage);
 992                        set_page_dirty(filepage);
 993                        swap_free(swap);
 994                } else if (!(error = move_from_swap_cache(
 995                                swappage, idx, mapping))) {
 996                        info->flags |= SHMEM_PAGEIN;
 997                        shmem_swp_set(info, entry, 0);
 998                        shmem_swp_unmap(entry);
 999                        spin_unlock(&info->lock);
1000                        filepage = swappage;
1001                        swap_free(swap);
1002                } else {
1003                        shmem_swp_unmap(entry);
1004                        spin_unlock(&info->lock);
1005                        unlock_page(swappage);
1006                        page_cache_release(swappage);
1007                        if (error == -ENOMEM) {
1008                                /* let kswapd refresh zone for GFP_ATOMICs */
1009                                blk_congestion_wait(WRITE, HZ/50);
1010                        }
1011                        goto repeat;
1012                }
1013        } else if (sgp == SGP_READ && !filepage) {
1014                shmem_swp_unmap(entry);
1015                filepage = find_get_page(mapping, idx);
1016                if (filepage &&
1017                    (!PageUptodate(filepage) || TestSetPageLocked(filepage))) {
1018                        spin_unlock(&info->lock);
1019                        wait_on_page_locked(filepage);
1020                        page_cache_release(filepage);
1021                        filepage = NULL;
1022                        goto repeat;
1023                }
1024                spin_unlock(&info->lock);
1025        } else {
1026                shmem_swp_unmap(entry);
1027                sbinfo = SHMEM_SB(inode->i_sb);
1028                if (sbinfo) {
1029                        spin_lock(&sbinfo->stat_lock);
1030                        if (sbinfo->free_blocks == 0 ||
1031                            shmem_acct_block(info->flags)) {
1032                                spin_unlock(&sbinfo->stat_lock);
1033                                spin_unlock(&info->lock);
1034                                error = -ENOSPC;
1035                                goto failed;
1036                        }
1037                        sbinfo->free_blocks--;
1038                        inode->i_blocks += BLOCKS_PER_PAGE;
1039                        spin_unlock(&sbinfo->stat_lock);
1040                } else if (shmem_acct_block(info->flags)) {
1041                        spin_unlock(&info->lock);
1042                        error = -ENOSPC;
1043                        goto failed;
1044                }
1045
1046                if (!filepage) {
1047                        spin_unlock(&info->lock);
1048                        filepage = shmem_alloc_page(mapping_gfp_mask(mapping),
1049                                                    info,
1050                                                    idx);
1051                        if (!filepage) {
1052                                shmem_unacct_blocks(info->flags, 1);
1053                                shmem_free_block(inode);
1054                                error = -ENOMEM;
1055                                goto failed;
1056                        }
1057
1058                        spin_lock(&info->lock);
1059                        entry = shmem_swp_alloc(info, idx, sgp);
1060                        if (IS_ERR(entry))
1061                                error = PTR_ERR(entry);
1062                        else {
1063                                swap = *entry;
1064                                shmem_swp_unmap(entry);
1065                        }
1066                        if (error || swap.val || 0 != add_to_page_cache_lru(
1067                                        filepage, mapping, idx, GFP_ATOMIC)) {
1068                                spin_unlock(&info->lock);
1069                                page_cache_release(filepage);
1070                                shmem_unacct_blocks(info->flags, 1);
1071                                shmem_free_block(inode);
1072                                filepage = NULL;
1073                                if (error)
1074                                        goto failed;
1075                                goto repeat;
1076                        }
1077                        info->flags |= SHMEM_PAGEIN;
1078                }
1079
1080                info->alloced++;
1081                spin_unlock(&info->lock);
1082                clear_highpage(filepage);
1083                flush_dcache_page(filepage);
1084                SetPageUptodate(filepage);
1085        }
1086done:
1087        if (*pagep != filepage) {
1088                unlock_page(filepage);
1089                *pagep = filepage;
1090        }
1091        return 0;
1092
1093failed:
1094        if (*pagep != filepage) {
1095                unlock_page(filepage);
1096                page_cache_release(filepage);
1097        }
1098        return error;
1099}
1100
1101struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int *type)
1102{
1103        struct inode *inode = vma->vm_file->f_dentry->d_inode;
1104        struct page *page = NULL;
1105        unsigned long idx;
1106        int error;
1107
1108        idx = (address - vma->vm_start) >> PAGE_SHIFT;
1109        idx += vma->vm_pgoff;
1110        idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
1111        if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode))
1112                return NOPAGE_SIGBUS;
1113
1114        error = shmem_getpage(inode, idx, &page, SGP_CACHE, type);
1115        if (error)
1116                return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
1117
1118        mark_page_accessed(page);
1119        return page;
1120}
1121
1122static int shmem_populate(struct vm_area_struct *vma,
1123        unsigned long addr, unsigned long len,
1124        pgprot_t prot, unsigned long pgoff, int nonblock)
1125{
1126        struct inode *inode = vma->vm_file->f_dentry->d_inode;
1127        struct mm_struct *mm = vma->vm_mm;
1128        enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE;
1129        unsigned long size;
1130
1131        size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
1132        if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size)
1133                return -EINVAL;
1134
1135        while ((long) len > 0) {
1136                struct page *page = NULL;
1137                int err;
1138                /*
1139                 * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE
1140                 */
1141                err = shmem_getpage(inode, pgoff, &page, sgp, NULL);
1142                if (err)
1143                        return err;
1144                if (page) {
1145                        mark_page_accessed(page);
1146                        err = install_page(mm, vma, addr, page, prot);
1147                        if (err) {
1148                                page_cache_release(page);
1149                                return err;
1150                        }
1151                } else if (nonblock) {
1152                        err = install_file_pte(mm, vma, addr, pgoff, prot);
1153                        if (err)
1154                                return err;
1155                }
1156
1157                len -= PAGE_SIZE;
1158                addr += PAGE_SIZE;
1159                pgoff++;
1160        }
1161        return 0;
1162}
1163
1164#ifdef CONFIG_NUMA
1165int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
1166{
1167        struct inode *i = vma->vm_file->f_dentry->d_inode;
1168        return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new);
1169}
1170
1171struct mempolicy *
1172shmem_get_policy(struct vm_area_struct *vma, unsigned long addr)
1173{
1174        struct inode *i = vma->vm_file->f_dentry->d_inode;
1175        unsigned long idx;
1176
1177        idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
1178        return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx);
1179}
1180#endif
1181
1182int shmem_lock(struct file *file, int lock, struct user_struct *user)
1183{
1184        struct inode *inode = file->f_dentry->d_inode;
1185        struct shmem_inode_info *info = SHMEM_I(inode);
1186        int retval = -ENOMEM;
1187
1188        spin_lock(&info->lock);
1189        if (lock && !(info->flags & VM_LOCKED)) {
1190                if (!user_shm_lock(inode->i_size, user))
1191                        goto out_nomem;
1192                info->flags |= VM_LOCKED;
1193        }
1194        if (!lock && (info->flags & VM_LOCKED) && user) {
1195                user_shm_unlock(inode->i_size, user);
1196                info->flags &= ~VM_LOCKED;
1197        }
1198        retval = 0;
1199out_nomem:
1200        spin_unlock(&info->lock);
1201        return retval;
1202}
1203
1204static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1205{
1206        file_accessed(file);
1207        vma->vm_ops = &shmem_vm_ops;
1208        return 0;
1209}
1210
1211static struct inode *
1212shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
1213{
1214        struct inode *inode;
1215        struct shmem_inode_info *info;
1216        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1217
1218        if (sbinfo) {
1219                spin_lock(&sbinfo->stat_lock);
1220                if (!sbinfo->free_inodes) {
1221                        spin_unlock(&sbinfo->stat_lock);
1222                        return NULL;
1223                }
1224                sbinfo->free_inodes--;
1225                spin_unlock(&sbinfo->stat_lock);
1226        }
1227
1228        inode = new_inode(sb);
1229        if (inode) {
1230                inode->i_mode = mode;
1231                inode->i_uid = current->fsuid;
1232                inode->i_gid = current->fsgid;
1233                inode->i_blksize = PAGE_CACHE_SIZE;
1234                inode->i_blocks = 0;
1235                inode->i_mapping->a_ops = &shmem_aops;
1236                inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
1237                inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1238                info = SHMEM_I(inode);
1239                memset(info, 0, (char *)inode - (char *)info);
1240                spin_lock_init(&info->lock);
1241                INIT_LIST_HEAD(&info->swaplist);
1242
1243                switch (mode & S_IFMT) {
1244                default:
1245                        inode->i_op = &shmem_special_inode_operations;
1246                        init_special_inode(inode, mode, dev);
1247                        break;
1248                case S_IFREG:
1249                        inode->i_op = &shmem_inode_operations;
1250                        inode->i_fop = &shmem_file_operations;
1251                        mpol_shared_policy_init(&info->policy);
1252                        break;
1253                case S_IFDIR:
1254                        inode->i_nlink++;
1255                        /* Some things misbehave if size == 0 on a directory */
1256                        inode->i_size = 2 * BOGO_DIRENT_SIZE;
1257                        inode->i_op = &shmem_dir_inode_operations;
1258                        inode->i_fop = &simple_dir_operations;
1259                        break;
1260                case S_IFLNK:
1261                        /*
1262                         * Must not load anything in the rbtree,
1263                         * mpol_free_shared_policy will not be called.
1264                         */
1265                        mpol_shared_policy_init(&info->policy);
1266                        break;
1267                }
1268        } else if (sbinfo) {
1269                spin_lock(&sbinfo->stat_lock);
1270                sbinfo->free_inodes++;
1271                spin_unlock(&sbinfo->stat_lock);
1272        }
1273        return inode;
1274}
1275
1276#ifdef CONFIG_TMPFS
1277
1278static int shmem_set_size(struct shmem_sb_info *sbinfo,
1279                          unsigned long max_blocks, unsigned long max_inodes)
1280{
1281        int error;
1282        unsigned long blocks, inodes;
1283
1284        spin_lock(&sbinfo->stat_lock);
1285        blocks = sbinfo->max_blocks - sbinfo->free_blocks;
1286        inodes = sbinfo->max_inodes - sbinfo->free_inodes;
1287        error = -EINVAL;
1288        if (max_blocks < blocks)
1289                goto out;
1290        if (max_inodes < inodes)
1291                goto out;
1292        error = 0;
1293        sbinfo->max_blocks  = max_blocks;
1294        sbinfo->free_blocks = max_blocks - blocks;
1295        sbinfo->max_inodes  = max_inodes;
1296        sbinfo->free_inodes = max_inodes - inodes;
1297out:
1298        spin_unlock(&sbinfo->stat_lock);
1299        return error;
1300}
1301
1302static struct inode_operations shmem_symlink_inode_operations;
1303static struct inode_operations shmem_symlink_inline_operations;
1304
1305/*
1306 * Normally tmpfs makes no use of shmem_prepare_write, but it
1307 * lets a tmpfs file be used read-write below the loop driver.
1308 */
1309static int
1310shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
1311{
1312        struct inode *inode = page->mapping->host;
1313        return shmem_getpage(inode, page->index, &page, SGP_WRITE, NULL);
1314}
1315
1316static ssize_t
1317shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
1318{
1319        struct inode    *inode = file->f_dentry->d_inode;
1320        loff_t          pos;
1321        unsigned long   written;
1322        ssize_t         err;
1323
1324        if ((ssize_t) count < 0)
1325                return -EINVAL;
1326
1327        if (!access_ok(VERIFY_READ, buf, count))
1328                return -EFAULT;
1329
1330        down(&inode->i_sem);
1331
1332        pos = *ppos;
1333        written = 0;
1334
1335        err = generic_write_checks(file, &pos, &count, 0);
1336        if (err || !count)
1337                goto out;
1338
1339        err = remove_suid(file->f_dentry);
1340        if (err)
1341                goto out;
1342
1343        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
1344
1345        do {
1346                struct page *page = NULL;
1347                unsigned long bytes, index, offset;
1348                char *kaddr;
1349                int left;
1350
1351                offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
1352                index = pos >> PAGE_CACHE_SHIFT;
1353                bytes = PAGE_CACHE_SIZE - offset;
1354                if (bytes > count)
1355                        bytes = count;
1356
1357                /*
1358                 * We don't hold page lock across copy from user -
1359                 * what would it guard against? - so no deadlock here.
1360                 * But it still may be a good idea to prefault below.
1361                 */
1362
1363                err = shmem_getpage(inode, index, &page, SGP_WRITE, NULL);
1364                if (err)
1365                        break;
1366
1367                left = bytes;
1368                if (PageHighMem(page)) {
1369                        volatile unsigned char dummy;
1370                        __get_user(dummy, buf);
1371                        __get_user(dummy, buf + bytes - 1);
1372
1373                        kaddr = kmap_atomic(page, KM_USER0);
1374                        left = __copy_from_user_inatomic(kaddr + offset,
1375                                                        buf, bytes);
1376                        kunmap_atomic(kaddr, KM_USER0);
1377                }
1378                if (left) {
1379                        kaddr = kmap(page);
1380                        left = __copy_from_user(kaddr + offset, buf, bytes);
1381                        kunmap(page);
1382                }
1383
1384                written += bytes;
1385                count -= bytes;
1386                pos += bytes;
1387                buf += bytes;
1388                if (pos > inode->i_size)
1389                        i_size_write(inode, pos);
1390
1391                flush_dcache_page(page);
1392                set_page_dirty(page);
1393                mark_page_accessed(page);
1394                page_cache_release(page);
1395
1396                if (left) {
1397                        pos -= left;
1398                        written -= left;
1399                        err = -EFAULT;
1400                        break;
1401                }
1402
1403                /*
1404                 * Our dirty pages are not counted in nr_dirty,
1405                 * and we do not attempt to balance dirty pages.
1406                 */
1407
1408                cond_resched();
1409        } while (count);
1410
1411        *ppos = pos;
1412        if (written)
1413                err = written;
1414out:
1415        up(&inode->i_sem);
1416        return err;
1417}
1418
1419static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
1420{
1421        struct inode *inode = filp->f_dentry->d_inode;
1422        struct address_space *mapping = inode->i_mapping;
1423        unsigned long index, offset;
1424
1425        index = *ppos >> PAGE_CACHE_SHIFT;
1426        offset = *ppos & ~PAGE_CACHE_MASK;
1427
1428        for (;;) {
1429                struct page *page = NULL;
1430                unsigned long end_index, nr, ret;
1431                loff_t i_size = i_size_read(inode);
1432
1433                end_index = i_size >> PAGE_CACHE_SHIFT;
1434                if (index > end_index)
1435                        break;
1436                if (index == end_index) {
1437                        nr = i_size & ~PAGE_CACHE_MASK;
1438                        if (nr <= offset)
1439                                break;
1440                }
1441
1442                desc->error = shmem_getpage(inode, index, &page, SGP_READ, NULL);
1443                if (desc->error) {
1444                        if (desc->error == -EINVAL)
1445                                desc->error = 0;
1446                        break;
1447                }
1448
1449                /*
1450                 * We must evaluate after, since reads (unlike writes)
1451                 * are called without i_sem protection against truncate
1452                 */
1453                nr = PAGE_CACHE_SIZE;
1454                i_size = i_size_read(inode);
1455                end_index = i_size >> PAGE_CACHE_SHIFT;
1456                if (index == end_index) {
1457                        nr = i_size & ~PAGE_CACHE_MASK;
1458                        if (nr <= offset) {
1459                                if (page)
1460                                        page_cache_release(page);
1461                                break;
1462                        }
1463                }
1464                nr -= offset;
1465
1466                if (page) {
1467                        /*
1468                         * If users can be writing to this page using arbitrary
1469                         * virtual addresses, take care about potential aliasing
1470                         * before reading the page on the kernel side.
1471                         */
1472                        if (mapping_writably_mapped(mapping))
1473                                flush_dcache_page(page);
1474                        /*
1475                         * Mark the page accessed if we read the beginning.
1476                         */
1477                        if (!offset)
1478                                mark_page_accessed(page);
1479                } else
1480                        page = ZERO_PAGE(0);
1481
1482                /*
1483                 * Ok, we have the page, and it's up-to-date, so
1484                 * now we can copy it to user space...
1485                 *
1486                 * The actor routine returns how many bytes were actually used..
1487                 * NOTE! This may not be the same as how much of a user buffer
1488                 * we filled up (we may be padding etc), so we can only update
1489                 * "pos" here (the actor routine has to update the user buffer
1490                 * pointers and the remaining count).
1491                 */
1492                ret = actor(desc, page, offset, nr);
1493                offset += ret;
1494                index += offset >> PAGE_CACHE_SHIFT;
1495                offset &= ~PAGE_CACHE_MASK;
1496
1497                page_cache_release(page);
1498                if (ret != nr || !desc->count)
1499                        break;
1500
1501                cond_resched();
1502        }
1503
1504        *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1505        file_accessed(filp);
1506}
1507
1508static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
1509{
1510        read_descriptor_t desc;
1511
1512        if ((ssize_t) count < 0)
1513                return -EINVAL;
1514        if (!access_ok(VERIFY_WRITE, buf, count))
1515                return -EFAULT;
1516        if (!count)
1517                return 0;
1518
1519        desc.written = 0;
1520        desc.count = count;
1521        desc.arg.buf = buf;
1522        desc.error = 0;
1523
1524        do_shmem_file_read(filp, ppos, &desc, file_read_actor);
1525        if (desc.written)
1526                return desc.written;
1527        return desc.error;
1528}
1529
1530static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos,
1531                         size_t count, read_actor_t actor, void *target)
1532{
1533        read_descriptor_t desc;
1534
1535        if (!count)
1536                return 0;
1537
1538        desc.written = 0;
1539        desc.count = count;
1540        desc.arg.data = target;
1541        desc.error = 0;
1542
1543        do_shmem_file_read(in_file, ppos, &desc, actor);
1544        if (desc.written)
1545                return desc.written;
1546        return desc.error;
1547}
1548
1549static int shmem_statfs(struct super_block *sb, struct kstatfs *buf)
1550{
1551        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1552
1553        buf->f_type = TMPFS_MAGIC;
1554        buf->f_bsize = PAGE_CACHE_SIZE;
1555        buf->f_namelen = NAME_MAX;
1556        if (sbinfo) {
1557                spin_lock(&sbinfo->stat_lock);
1558                buf->f_blocks = sbinfo->max_blocks;
1559                buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
1560                buf->f_files = sbinfo->max_inodes;
1561                buf->f_ffree = sbinfo->free_inodes;
1562                spin_unlock(&sbinfo->stat_lock);
1563        }
1564        /* else leave those fields 0 like simple_statfs */
1565        return 0;
1566}
1567
1568/*
1569 * File creation. Allocate an inode, and we're done..
1570 */
1571static int
1572shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1573{
1574        struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev);
1575        int error = -ENOSPC;
1576
1577        if (inode) {
1578                if (dir->i_mode & S_ISGID) {
1579                        inode->i_gid = dir->i_gid;
1580                        if (S_ISDIR(mode))
1581                                inode->i_mode |= S_ISGID;
1582                }
1583                dir->i_size += BOGO_DIRENT_SIZE;
1584                dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1585                d_instantiate(dentry, inode);
1586                dget(dentry); /* Extra count - pin the dentry in core */
1587                error = 0;
1588        }
1589        return error;
1590}
1591
1592static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1593{
1594        int error;
1595
1596        if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
1597                return error;
1598        dir->i_nlink++;
1599        return 0;
1600}
1601
1602static int shmem_create(struct inode *dir, struct dentry *dentry, int mode,
1603                struct nameidata *nd)
1604{
1605        return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
1606}
1607
1608/*
1609 * Link a file..
1610 */
1611static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1612{
1613        struct inode *inode = old_dentry->d_inode;
1614        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
1615
1616        /*
1617         * No ordinary (disk based) filesystem counts links as inodes;
1618         * but each new link needs a new dentry, pinning lowmem, and
1619         * tmpfs dentries cannot be pruned until they are unlinked.
1620         */
1621        if (sbinfo) {
1622                spin_lock(&sbinfo->stat_lock);
1623                if (!sbinfo->free_inodes) {
1624                        spin_unlock(&sbinfo->stat_lock);
1625                        return -ENOSPC;
1626                }
1627                sbinfo->free_inodes--;
1628                spin_unlock(&sbinfo->stat_lock);
1629        }
1630
1631        dir->i_size += BOGO_DIRENT_SIZE;
1632        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1633        inode->i_nlink++;
1634        atomic_inc(&inode->i_count);    /* New dentry reference */
1635        dget(dentry);           /* Extra pinning count for the created dentry */
1636        d_instantiate(dentry, inode);
1637        return 0;
1638}
1639
1640static int shmem_unlink(struct inode *dir, struct dentry *dentry)
1641{
1642        struct inode *inode = dentry->d_inode;
1643
1644        if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) {
1645                struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
1646                if (sbinfo) {
1647                        spin_lock(&sbinfo->stat_lock);
1648                        sbinfo->free_inodes++;
1649                        spin_unlock(&sbinfo->stat_lock);
1650                }
1651        }
1652
1653        dir->i_size -= BOGO_DIRENT_SIZE;
1654        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1655        inode->i_nlink--;
1656        dput(dentry);   /* Undo the count from "create" - this does all the work */
1657        return 0;
1658}
1659
1660static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
1661{
1662        if (!simple_empty(dentry))
1663                return -ENOTEMPTY;
1664
1665        dir->i_nlink--;
1666        return shmem_unlink(dir, dentry);
1667}
1668
1669/*
1670 * The VFS layer already does all the dentry stuff for rename,
1671 * we just have to decrement the usage count for the target if
1672 * it exists so that the VFS layer correctly free's it when it
1673 * gets overwritten.
1674 */
1675static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
1676{
1677        struct inode *inode = old_dentry->d_inode;
1678        int they_are_dirs = S_ISDIR(inode->i_mode);
1679
1680        if (!simple_empty(new_dentry))
1681                return -ENOTEMPTY;
1682
1683        if (new_dentry->d_inode) {
1684                (void) shmem_unlink(new_dir, new_dentry);
1685                if (they_are_dirs)
1686                        old_dir->i_nlink--;
1687        } else if (they_are_dirs) {
1688                old_dir->i_nlink--;
1689                new_dir->i_nlink++;
1690        }
1691
1692        old_dir->i_size -= BOGO_DIRENT_SIZE;
1693        new_dir->i_size += BOGO_DIRENT_SIZE;
1694        old_dir->i_ctime = old_dir->i_mtime =
1695        new_dir->i_ctime = new_dir->i_mtime =
1696        inode->i_ctime = CURRENT_TIME;
1697        return 0;
1698}
1699
1700static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1701{
1702        int error;
1703        int len;
1704        struct inode *inode;
1705        struct page *page = NULL;
1706        char *kaddr;
1707        struct shmem_inode_info *info;
1708
1709        len = strlen(symname) + 1;
1710        if (len > PAGE_CACHE_SIZE)
1711                return -ENAMETOOLONG;
1712
1713        inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
1714        if (!inode)
1715                return -ENOSPC;
1716
1717        info = SHMEM_I(inode);
1718        inode->i_size = len-1;
1719        if (len <= (char *)inode - (char *)info) {
1720                /* do it inline */
1721                memcpy(info, symname, len);
1722                inode->i_op = &shmem_symlink_inline_operations;
1723        } else {
1724                error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
1725                if (error) {
1726                        iput(inode);
1727                        return error;
1728                }
1729                inode->i_op = &shmem_symlink_inode_operations;
1730                kaddr = kmap_atomic(page, KM_USER0);
1731                memcpy(kaddr, symname, len);
1732                kunmap_atomic(kaddr, KM_USER0);
1733                set_page_dirty(page);
1734                page_cache_release(page);
1735        }
1736        if (dir->i_mode & S_ISGID)
1737                inode->i_gid = dir->i_gid;
1738        dir->i_size += BOGO_DIRENT_SIZE;
1739        dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1740        d_instantiate(dentry, inode);
1741        dget(dentry);
1742        return 0;
1743}
1744
1745static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
1746{
1747        nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode));
1748        return 0;
1749}
1750
1751static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
1752{
1753        struct page *page = NULL;
1754        int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
1755        nd_set_link(nd, res ? ERR_PTR(res) : kmap(page));
1756        return 0;
1757}
1758
1759static void shmem_put_link(struct dentry *dentry, struct nameidata *nd)
1760{
1761        if (!IS_ERR(nd_get_link(nd))) {
1762                struct page *page;
1763
1764                page = find_get_page(dentry->d_inode->i_mapping, 0);
1765                if (!page)
1766                        BUG();
1767                kunmap(page);
1768                mark_page_accessed(page);
1769                page_cache_release(page);
1770                page_cache_release(page);
1771        }
1772}
1773
1774static struct inode_operations shmem_symlink_inline_operations = {
1775        .readlink       = generic_readlink,
1776        .follow_link    = shmem_follow_link_inline,
1777#ifdef CONFIG_TMPFS_XATTR
1778        .setxattr       = generic_setxattr,
1779        .getxattr       = generic_getxattr,
1780        .listxattr      = generic_listxattr,
1781        .removexattr    = generic_removexattr,
1782#endif
1783};
1784
1785static struct inode_operations shmem_symlink_inode_operations = {
1786        .truncate       = shmem_truncate,
1787        .readlink       = generic_readlink,
1788        .follow_link    = shmem_follow_link,
1789        .put_link       = shmem_put_link,
1790#ifdef CONFIG_TMPFS_XATTR
1791        .setxattr       = generic_setxattr,
1792        .getxattr       = generic_getxattr,
1793        .listxattr      = generic_listxattr,
1794        .removexattr    = generic_removexattr,
1795#endif
1796};
1797
1798static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes)
1799{
1800        char *this_char, *value, *rest;
1801
1802        while ((this_char = strsep(&options, ",")) != NULL) {
1803                if (!*this_char)
1804                        continue;
1805                if ((value = strchr(this_char,'=')) != NULL) {
1806                        *value++ = 0;
1807                } else {
1808                        printk(KERN_ERR
1809                            "tmpfs: No value for mount option '%s'\n",
1810                            this_char);
1811                        return 1;
1812                }
1813
1814                if (!strcmp(this_char,"size")) {
1815                        unsigned long long size;
1816                        size = memparse(value,&rest);
1817                        if (*rest == '%') {
1818                                size <<= PAGE_SHIFT;
1819                                size *= totalram_pages;
1820                                do_div(size, 100);
1821                                rest++;
1822                        }
1823                        if (*rest)
1824                                goto bad_val;
1825                        *blocks = size >> PAGE_CACHE_SHIFT;
1826                } else if (!strcmp(this_char,"nr_blocks")) {
1827                        *blocks = memparse(value,&rest);
1828                        if (*rest)
1829                                goto bad_val;
1830                } else if (!strcmp(this_char,"nr_inodes")) {
1831                        *inodes = memparse(value,&rest);
1832                        if (*rest)
1833                                goto bad_val;
1834                } else if (!strcmp(this_char,"mode")) {
1835                        if (!mode)
1836                                continue;
1837                        *mode = simple_strtoul(value,&rest,8);
1838                        if (*rest)
1839                                goto bad_val;
1840                } else if (!strcmp(this_char,"uid")) {
1841                        if (!uid)
1842                                continue;
1843                        *uid = simple_strtoul(value,&rest,0);
1844                        if (*rest)
1845                                goto bad_val;
1846                } else if (!strcmp(this_char,"gid")) {
1847                        if (!gid)
1848                                continue;
1849                        *gid = simple_strtoul(value,&rest,0);
1850                        if (*rest)
1851                                goto bad_val;
1852                } else {
1853                        printk(KERN_ERR "tmpfs: Bad mount option %s\n",
1854                               this_char);
1855                        return 1;
1856                }
1857        }
1858        return 0;
1859
1860bad_val:
1861        printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
1862               value, this_char);
1863        return 1;
1864
1865}
1866
1867static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
1868{
1869        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1870        unsigned long max_blocks = 0;
1871        unsigned long max_inodes = 0;
1872
1873        if (sbinfo) {
1874                max_blocks = sbinfo->max_blocks;
1875                max_inodes = sbinfo->max_inodes;
1876        }
1877        if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes))
1878                return -EINVAL;
1879        /* Keep it simple: disallow limited <-> unlimited remount */
1880        if ((max_blocks || max_inodes) == !sbinfo)
1881                return -EINVAL;
1882        /* But allow the pointless unlimited -> unlimited remount */
1883        if (!sbinfo)
1884                return 0;
1885        return shmem_set_size(sbinfo, max_blocks, max_inodes);
1886}
1887#endif
1888
1889static void shmem_put_super(struct super_block *sb)
1890{
1891        kfree(sb->s_fs_info);
1892        sb->s_fs_info = NULL;
1893}
1894
1895#ifdef CONFIG_TMPFS_XATTR
1896static struct xattr_handler *shmem_xattr_handlers[];
1897#else
1898#define shmem_xattr_handlers NULL
1899#endif
1900
1901static int shmem_fill_super(struct super_block *sb,
1902                            void *data, int silent)
1903{
1904        struct inode *inode;
1905        struct dentry *root;
1906        int mode   = S_IRWXUGO | S_ISVTX;
1907        uid_t uid = current->fsuid;
1908        gid_t gid = current->fsgid;
1909        int err = -ENOMEM;
1910
1911#ifdef CONFIG_TMPFS
1912        unsigned long blocks = 0;
1913        unsigned long inodes = 0;
1914
1915        /*
1916         * Per default we only allow half of the physical ram per
1917         * tmpfs instance, limiting inodes to one per page of lowmem;
1918         * but the internal instance is left unlimited.
1919         */
1920        if (!(sb->s_flags & MS_NOUSER)) {
1921                blocks = totalram_pages / 2;
1922                inodes = totalram_pages - totalhigh_pages;
1923                if (inodes > blocks)
1924                        inodes = blocks;
1925
1926                if (shmem_parse_options(data, &mode,
1927                                        &uid, &gid, &blocks, &inodes))
1928                        return -EINVAL;
1929        }
1930
1931        if (blocks || inodes) {
1932                struct shmem_sb_info *sbinfo;
1933                sbinfo = kmalloc(sizeof(struct shmem_sb_info), GFP_KERNEL);
1934                if (!sbinfo)
1935                        return -ENOMEM;
1936                sb->s_fs_info = sbinfo;
1937                spin_lock_init(&sbinfo->stat_lock);
1938                sbinfo->max_blocks = blocks;
1939                sbinfo->free_blocks = blocks;
1940                sbinfo->max_inodes = inodes;
1941                sbinfo->free_inodes = inodes;
1942        }
1943        sb->s_xattr = shmem_xattr_handlers;
1944#endif
1945
1946        sb->s_maxbytes = SHMEM_MAX_BYTES;
1947        sb->s_blocksize = PAGE_CACHE_SIZE;
1948        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1949        sb->s_magic = TMPFS_MAGIC;
1950        sb->s_op = &shmem_ops;
1951        inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
1952        if (!inode)
1953                goto failed;
1954        inode->i_uid = uid;
1955        inode->i_gid = gid;
1956        root = d_alloc_root(inode);
1957        if (!root)
1958                goto failed_iput;
1959        sb->s_root = root;
1960        return 0;
1961
1962failed_iput:
1963        iput(inode);
1964failed:
1965        shmem_put_super(sb);
1966        return err;
1967}
1968
1969static kmem_cache_t *shmem_inode_cachep;
1970
1971static struct inode *shmem_alloc_inode(struct super_block *sb)
1972{
1973        struct shmem_inode_info *p;
1974        p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, SLAB_KERNEL);
1975        if (!p)
1976                return NULL;
1977        return &p->vfs_inode;
1978}
1979
1980static void shmem_destroy_inode(struct inode *inode)
1981{
1982        if ((inode->i_mode & S_IFMT) == S_IFREG ||
1983            inode->i_op == &shmem_symlink_inode_operations) {
1984                /* only struct inode is valid if it's an inline symlink */
1985                mpol_free_shared_policy(&SHMEM_I(inode)->policy);
1986        }
1987        kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
1988}
1989
1990static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
1991{
1992        struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
1993
1994        if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
1995            SLAB_CTOR_CONSTRUCTOR) {
1996                inode_init_once(&p->vfs_inode);
1997        }
1998}
1999
2000static int init_inodecache(void)
2001{
2002        shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
2003                                sizeof(struct shmem_inode_info),
2004                                0, 0, init_once, NULL);
2005        if (shmem_inode_cachep == NULL)
2006                return -ENOMEM;
2007        return 0;
2008}
2009
2010static void destroy_inodecache(void)
2011{
2012        if (kmem_cache_destroy(shmem_inode_cachep))
2013                printk(KERN_INFO "shmem_inode_cache: not all structures were freed\n");
2014}
2015
2016static struct address_space_operations shmem_aops = {
2017        .writepage      = shmem_writepage,
2018        .set_page_dirty = __set_page_dirty_nobuffers,
2019#ifdef CONFIG_TMPFS
2020        .prepare_write  = shmem_prepare_write,
2021        .commit_write   = simple_commit_write,
2022#endif
2023};
2024
2025static struct file_operations shmem_file_operations = {
2026        .mmap           = shmem_mmap,
2027#ifdef CONFIG_TMPFS
2028        .llseek         = generic_file_llseek,
2029        .read           = shmem_file_read,
2030        .write          = shmem_file_write,
2031        .fsync          = simple_sync_file,
2032        .sendfile       = shmem_file_sendfile,
2033#endif
2034};
2035
2036static struct inode_operations shmem_inode_operations = {
2037        .truncate       = shmem_truncate,
2038        .setattr        = shmem_notify_change,
2039#ifdef CONFIG_TMPFS_XATTR
2040        .setxattr       = generic_setxattr,
2041        .getxattr       = generic_getxattr,
2042        .listxattr      = generic_listxattr,
2043        .removexattr    = generic_removexattr,
2044#endif
2045};
2046
2047static struct inode_operations shmem_dir_inode_operations = {
2048#ifdef CONFIG_TMPFS
2049        .create         = shmem_create,
2050        .lookup         = simple_lookup,
2051        .link           = shmem_link,
2052        .unlink         = shmem_unlink,
2053        .symlink        = shmem_symlink,
2054        .mkdir          = shmem_mkdir,
2055        .rmdir          = shmem_rmdir,
2056        .mknod          = shmem_mknod,
2057        .rename         = shmem_rename,
2058#ifdef CONFIG_TMPFS_XATTR
2059        .setxattr       = generic_setxattr,
2060        .getxattr       = generic_getxattr,
2061        .listxattr      = generic_listxattr,
2062        .removexattr    = generic_removexattr,
2063#endif
2064#endif
2065};
2066
2067static struct inode_operations shmem_special_inode_operations = {
2068#ifdef CONFIG_TMPFS_XATTR
2069        .setxattr       = generic_setxattr,
2070        .getxattr       = generic_getxattr,
2071        .listxattr      = generic_listxattr,
2072        .removexattr    = generic_removexattr,
2073#endif
2074};
2075
2076static struct super_operations shmem_ops = {
2077        .alloc_inode    = shmem_alloc_inode,
2078        .destroy_inode  = shmem_destroy_inode,
2079#ifdef CONFIG_TMPFS
2080        .statfs         = shmem_statfs,
2081        .remount_fs     = shmem_remount_fs,
2082#endif
2083        .delete_inode   = shmem_delete_inode,
2084        .drop_inode     = generic_delete_inode,
2085        .put_super      = shmem_put_super,
2086};
2087
2088static struct vm_operations_struct shmem_vm_ops = {
2089        .nopage         = shmem_nopage,
2090        .populate       = shmem_populate,
2091#ifdef CONFIG_NUMA
2092        .set_policy     = shmem_set_policy,
2093        .get_policy     = shmem_get_policy,
2094#endif
2095};
2096
2097
2098#ifdef CONFIG_TMPFS_SECURITY
2099
2100static size_t shmem_xattr_security_list(struct inode *inode, char *list, size_t list_len,
2101                                        const char *name, size_t name_len)
2102{
2103        return security_inode_listsecurity(inode, list, list_len);
2104}
2105
2106static int shmem_xattr_security_get(struct inode *inode, const char *name, void *buffer, size_t size)
2107{
2108        if (strcmp(name, "") == 0)
2109                return -EINVAL;
2110        return security_inode_getsecurity(inode, name, buffer, size);
2111}
2112
2113static int shmem_xattr_security_set(struct inode *inode, const char *name, const void *value, size_t size, int flags)
2114{
2115        if (strcmp(name, "") == 0)
2116                return -EINVAL;
2117        return security_inode_setsecurity(inode, name, value, size, flags);
2118}
2119
2120struct xattr_handler shmem_xattr_security_handler = {
2121        .prefix = XATTR_SECURITY_PREFIX,
2122        .list   = shmem_xattr_security_list,
2123        .get    = shmem_xattr_security_get,
2124        .set    = shmem_xattr_security_set,
2125};
2126
2127#endif  /* CONFIG_TMPFS_SECURITY */
2128
2129#ifdef CONFIG_TMPFS_XATTR
2130
2131static struct xattr_handler *shmem_xattr_handlers[] = {
2132#ifdef CONFIG_TMPFS_SECURITY
2133        &shmem_xattr_security_handler,
2134#endif
2135        NULL
2136};
2137
2138#endif  /* CONFIG_TMPFS_XATTR */
2139
2140static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
2141        int flags, const char *dev_name, void *data)
2142{
2143        return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
2144}
2145
2146static struct file_system_type tmpfs_fs_type = {
2147        .owner          = THIS_MODULE,
2148        .name           = "tmpfs",
2149        .get_sb         = shmem_get_sb,
2150        .kill_sb        = kill_litter_super,
2151};
2152static struct vfsmount *shm_mnt;
2153
2154static int __init init_tmpfs(void)
2155{
2156        int error;
2157
2158        error = init_inodecache();
2159        if (error)
2160                goto out3;
2161
2162        error = register_filesystem(&tmpfs_fs_type);
2163        if (error) {
2164                printk(KERN_ERR "Could not register tmpfs\n");
2165                goto out2;
2166        }
2167#ifdef CONFIG_TMPFS
2168        devfs_mk_dir("shm");
2169#endif
2170        shm_mnt = do_kern_mount(tmpfs_fs_type.name, MS_NOUSER,
2171                                tmpfs_fs_type.name, NULL);
2172        if (IS_ERR(shm_mnt)) {
2173                error = PTR_ERR(shm_mnt);
2174                printk(KERN_ERR "Could not kern_mount tmpfs\n");
2175                goto out1;
2176        }
2177        return 0;
2178
2179out1:
2180        unregister_filesystem(&tmpfs_fs_type);
2181out2:
2182        destroy_inodecache();
2183out3:
2184        shm_mnt = ERR_PTR(error);
2185        return error;
2186}
2187module_init(init_tmpfs)
2188
2189/*
2190 * shmem_file_setup - get an unlinked file living in tmpfs
2191 *
2192 * @name: name for dentry (to be seen in /proc/<pid>/maps
2193 * @size: size to be set for the file
2194 *
2195 */
2196struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
2197{
2198        int error;
2199        struct file *file;
2200        struct inode *inode;
2201        struct dentry *dentry, *root;
2202        struct qstr this;
2203
2204        if (IS_ERR(shm_mnt))
2205                return (void *)shm_mnt;
2206
2207        if (size < 0 || size > SHMEM_MAX_BYTES)
2208                return ERR_PTR(-EINVAL);
2209
2210        if (shmem_acct_size(flags, size))
2211                return ERR_PTR(-ENOMEM);
2212
2213        error = -ENOMEM;
2214        this.name = name;
2215        this.len = strlen(name);
2216        this.hash = 0; /* will go */
2217        root = shm_mnt->mnt_root;
2218        dentry = d_alloc(root, &this);
2219        if (!dentry)
2220                goto put_memory;
2221
2222        error = -ENFILE;
2223        file = get_empty_filp();
2224        if (!file)
2225                goto put_dentry;
2226
2227        error = -ENOSPC;
2228        inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
2229        if (!inode)
2230                goto close_file;
2231
2232        SHMEM_I(inode)->flags = flags & VM_ACCOUNT;
2233        d_instantiate(dentry, inode);
2234        inode->i_size = size;
2235        inode->i_nlink = 0;     /* It is unlinked */
2236        file->f_vfsmnt = mntget(shm_mnt);
2237        file->f_dentry = dentry;
2238        file->f_mapping = inode->i_mapping;
2239        file->f_op = &shmem_file_operations;
2240        file->f_mode = FMODE_WRITE | FMODE_READ;
2241        return file;
2242
2243close_file:
2244        put_filp(file);
2245put_dentry:
2246        dput(dentry);
2247put_memory:
2248        shmem_unacct_size(flags, size);
2249        return ERR_PTR(error);
2250}
2251
2252/*
2253 * shmem_zero_setup - setup a shared anonymous mapping
2254 *
2255 * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
2256 */
2257int shmem_zero_setup(struct vm_area_struct *vma)
2258{
2259        struct file *file;
2260        loff_t size = vma->vm_end - vma->vm_start;
2261
2262        file = shmem_file_setup("dev/zero", size, vma->vm_flags);
2263        if (IS_ERR(file))
2264                return PTR_ERR(file);
2265
2266        if (vma->vm_file)
2267                fput(vma->vm_file);
2268        vma->vm_file = file;
2269        vma->vm_ops = &shmem_vm_ops;
2270        return 0;
2271}
2272