RHEL4/fs/dcache.c
<<
>>
Prefs
   1/*
   2 * fs/dcache.c
   3 *
   4 * Complete reimplementation
   5 * (C) 1997 Thomas Schoebel-Theuer,
   6 * with heavy changes by Linus Torvalds
   7 */
   8
   9/*
  10 * Notes on the allocation strategy:
  11 *
  12 * The dcache is a master of the icache - whenever a dcache entry
  13 * exists, the inode will always exist. "iput()" is done either when
  14 * the dcache entry is deleted or garbage collected.
  15 */
  16
  17#include <linux/config.h>
  18#include <linux/string.h>
  19#include <linux/mm.h>
  20#include <linux/fs.h>
  21#include <linux/slab.h>
  22#include <linux/init.h>
  23#include <linux/smp_lock.h>
  24#include <linux/hash.h>
  25#include <linux/cache.h>
  26#include <linux/module.h>
  27#include <linux/mount.h>
  28#include <linux/file.h>
  29#include <asm/uaccess.h>
  30#include <linux/security.h>
  31#include <linux/seqlock.h>
  32#include <linux/swap.h>
  33#include <linux/bootmem.h>
  34#include <linux/audit.h>
  35
  36/* #define DCACHE_DEBUG 1 */
  37
  38int sysctl_vfs_cache_pressure = 100;
  39EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
  40
  41spinlock_t dcache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
  42seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
  43
  44EXPORT_SYMBOL(dcache_lock);
  45
  46static kmem_cache_t *dentry_cache; 
  47
  48#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
  49
  50/*
  51 * This is the single most critical data structure when it comes
  52 * to the dcache: the hashtable for lookups. Somebody should try
  53 * to make this good - I've just made it work.
  54 *
  55 * This hash-function tries to avoid losing too many bits of hash
  56 * information, yet avoid using a prime hash-size or similar.
  57 */
  58#define D_HASHBITS     d_hash_shift
  59#define D_HASHMASK     d_hash_mask
  60
  61static unsigned int d_hash_mask;
  62static unsigned int d_hash_shift;
  63static struct hlist_head *dentry_hashtable;
  64static LIST_HEAD(dentry_unused);
  65
  66/* Statistics gathering. */
  67struct dentry_stat_t dentry_stat = {
  68        .age_limit = 45,
  69};
  70
  71static void d_callback(struct rcu_head *head)
  72{
  73        struct dentry * dentry = container_of(head, struct dentry, d_rcu);
  74
  75        if (dname_external(dentry))
  76                kfree(dentry->d_name.name);
  77        kmem_cache_free(dentry_cache, dentry); 
  78}
  79
  80/*
  81 * no dcache_lock, please.  The caller must decrement dentry_stat.nr_dentry
  82 * inside dcache_lock.
  83 */
  84static void d_free(struct dentry *dentry)
  85{
  86        if (dentry->d_op && dentry->d_op->d_release)
  87                dentry->d_op->d_release(dentry);
  88        if (dentry->d_extra_attributes) {
  89                kfree(dentry->d_extra_attributes);
  90                dentry->d_extra_attributes = NULL;
  91        }
  92        call_rcu(&dentry->d_rcu, d_callback);
  93}
  94
  95/*
  96 * Release the dentry's inode, using the filesystem
  97 * d_iput() operation if defined.
  98 * Called with dcache_lock and per dentry lock held, drops both.
  99 */
 100static inline void dentry_iput(struct dentry * dentry)
 101{
 102        struct inode *inode = dentry->d_inode;
 103        if (inode) {
 104                audit_update_watch(dentry, 1);
 105                dentry->d_inode = NULL;
 106                list_del_init(&dentry->d_alias);
 107                spin_unlock(&dentry->d_lock);
 108                spin_unlock(&dcache_lock);
 109                if (dentry->d_op && dentry->d_op->d_iput)
 110                        dentry->d_op->d_iput(dentry, inode);
 111                else
 112                        iput(inode);
 113        } else {
 114                spin_unlock(&dentry->d_lock);
 115                spin_unlock(&dcache_lock);
 116        }
 117}
 118
 119/* 
 120 * This is dput
 121 *
 122 * This is complicated by the fact that we do not want to put
 123 * dentries that are no longer on any hash chain on the unused
 124 * list: we'd much rather just get rid of them immediately.
 125 *
 126 * However, that implies that we have to traverse the dentry
 127 * tree upwards to the parents which might _also_ now be
 128 * scheduled for deletion (it may have been only waiting for
 129 * its last child to go away).
 130 *
 131 * This tail recursion is done by hand as we don't want to depend
 132 * on the compiler to always get this right (gcc generally doesn't).
 133 * Real recursion would eat up our stack space.
 134 */
 135
 136/*
 137 * dput - release a dentry
 138 * @dentry: dentry to release 
 139 *
 140 * Release a dentry. This will drop the usage count and if appropriate
 141 * call the dentry unlink method as well as removing it from the queues and
 142 * releasing its resources. If the parent dentries were scheduled for release
 143 * they too may now get deleted.
 144 *
 145 * no dcache lock, please.
 146 */
 147
 148void dput(struct dentry *dentry)
 149{
 150        if (!dentry)
 151                return;
 152
 153repeat:
 154        if (atomic_read(&dentry->d_count) == 1)
 155                might_sleep();
 156        if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
 157                return;
 158
 159        spin_lock(&dentry->d_lock);
 160        if (atomic_read(&dentry->d_count)) {
 161                spin_unlock(&dentry->d_lock);
 162                spin_unlock(&dcache_lock);
 163                return;
 164        }
 165                        
 166        /*
 167         * AV: ->d_delete() is _NOT_ allowed to block now.
 168         */
 169        if (dentry->d_op && dentry->d_op->d_delete) {
 170                if (dentry->d_op->d_delete(dentry))
 171                        goto unhash_it;
 172        }
 173        /* Unreachable? Get rid of it */
 174        if (d_unhashed(dentry))
 175                goto kill_it;
 176        if (list_empty(&dentry->d_lru)) {
 177                dentry->d_flags |= DCACHE_REFERENCED;
 178                list_add(&dentry->d_lru, &dentry_unused);
 179                dentry_stat.nr_unused++;
 180        }
 181        spin_unlock(&dentry->d_lock);
 182        spin_unlock(&dcache_lock);
 183        return;
 184
 185unhash_it:
 186        __d_drop(dentry);
 187
 188kill_it: {
 189                struct dentry *parent;
 190
 191                /* If dentry was on d_lru list
 192                 * delete it from there
 193                 */
 194                if (!list_empty(&dentry->d_lru)) {
 195                        list_del(&dentry->d_lru);
 196                        dentry_stat.nr_unused--;
 197                }
 198                list_del(&dentry->d_child);
 199                dentry_stat.nr_dentry--;        /* For d_free, below */
 200                /*drops the locks, at that point nobody can reach this dentry */
 201                dentry_iput(dentry);
 202                parent = dentry->d_parent;
 203                d_free(dentry);
 204                if (dentry == parent)
 205                        return;
 206                dentry = parent;
 207                goto repeat;
 208        }
 209}
 210
 211/**
 212 * d_invalidate - invalidate a dentry
 213 * @dentry: dentry to invalidate
 214 *
 215 * Try to invalidate the dentry if it turns out to be
 216 * possible. If there are other dentries that can be
 217 * reached through this one we can't delete it and we
 218 * return -EBUSY. On success we return 0.
 219 *
 220 * no dcache lock.
 221 */
 222 
 223int d_invalidate(struct dentry * dentry)
 224{
 225        /*
 226         * If it's already been dropped, return OK.
 227         */
 228        spin_lock(&dcache_lock);
 229        if (d_unhashed(dentry)) {
 230                spin_unlock(&dcache_lock);
 231                return 0;
 232        }
 233        /*
 234         * Check whether to do a partial shrink_dcache
 235         * to get rid of unused child entries.
 236         */
 237        if (!list_empty(&dentry->d_subdirs)) {
 238                spin_unlock(&dcache_lock);
 239                shrink_dcache_parent(dentry);
 240                spin_lock(&dcache_lock);
 241        }
 242
 243        /*
 244         * Somebody else still using it?
 245         *
 246         * If it's a directory, we can't drop it
 247         * for fear of somebody re-populating it
 248         * with children (even though dropping it
 249         * would make it unreachable from the root,
 250         * we might still populate it if it was a
 251         * working directory or similar).
 252         */
 253        spin_lock(&dentry->d_lock);
 254        if (atomic_read(&dentry->d_count) > 1) {
 255                if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
 256                        spin_unlock(&dentry->d_lock);
 257                        spin_unlock(&dcache_lock);
 258                        return -EBUSY;
 259                }
 260        }
 261
 262        __d_drop(dentry);
 263        spin_unlock(&dentry->d_lock);
 264        spin_unlock(&dcache_lock);
 265        return 0;
 266}
 267
 268/* This should be called _only_ with dcache_lock held */
 269
 270static inline struct dentry * __dget_locked(struct dentry *dentry)
 271{
 272        atomic_inc(&dentry->d_count);
 273        if (!list_empty(&dentry->d_lru)) {
 274                dentry_stat.nr_unused--;
 275                list_del_init(&dentry->d_lru);
 276        }
 277        return dentry;
 278}
 279
 280struct dentry * dget_locked(struct dentry *dentry)
 281{
 282        return __dget_locked(dentry);
 283}
 284
 285/**
 286 * d_find_alias - grab a hashed alias of inode
 287 * @inode: inode in question
 288 * @want_discon:  flag, used by d_splice_alias, to request
 289 *          that only a DISCONNECTED alias be returned.
 290 *
 291 * If inode has a hashed alias, or is a directory and has any alias,
 292 * acquire the reference to alias and return it. Otherwise return NULL.
 293 * Notice that if inode is a directory there can be only one alias and
 294 * it can be unhashed only if it has no children, or if it is the root
 295 * of a filesystem.
 296 *
 297 * If the inode has a DCACHE_DISCONNECTED alias, then prefer
 298 * any other hashed alias over that one unless @want_discon is set,
 299 * in which case only return a DCACHE_DISCONNECTED alias.
 300 */
 301
 302static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
 303{
 304        struct list_head *head, *next, *tmp;
 305        struct dentry *alias, *discon_alias=NULL;
 306
 307        head = &inode->i_dentry;
 308        next = inode->i_dentry.next;
 309        while (next != head) {
 310                tmp = next;
 311                next = tmp->next;
 312                prefetch(next);
 313                alias = list_entry(tmp, struct dentry, d_alias);
 314                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 315                        if (alias->d_flags & DCACHE_DISCONNECTED)
 316                                discon_alias = alias;
 317                        else if (!want_discon) {
 318                                __dget_locked(alias);
 319                                return alias;
 320                        }
 321                }
 322        }
 323        if (discon_alias)
 324                __dget_locked(discon_alias);
 325        return discon_alias;
 326}
 327
 328struct dentry * d_find_alias(struct inode *inode)
 329{
 330        struct dentry *de;
 331        spin_lock(&dcache_lock);
 332        de = __d_find_alias(inode, 0);
 333        spin_unlock(&dcache_lock);
 334        return de;
 335}
 336
 337/*
 338 *      Try to kill dentries associated with this inode.
 339 * WARNING: you must own a reference to inode.
 340 */
 341void d_prune_aliases(struct inode *inode)
 342{
 343        struct list_head *tmp, *head = &inode->i_dentry;
 344restart:
 345        spin_lock(&dcache_lock);
 346        tmp = head;
 347        while ((tmp = tmp->next) != head) {
 348                struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
 349                if (!atomic_read(&dentry->d_count)) {
 350                        __dget_locked(dentry);
 351                        __d_drop(dentry);
 352                        spin_unlock(&dcache_lock);
 353                        dput(dentry);
 354                        goto restart;
 355                }
 356        }
 357        spin_unlock(&dcache_lock);
 358}
 359
 360/*
 361 * Throw away a dentry - free the inode, dput the parent.
 362 * This requires that the LRU list has already been
 363 * removed.
 364 * Called with dcache_lock, drops it and then regains.
 365 */
 366static inline void prune_one_dentry(struct dentry * dentry)
 367{
 368        struct dentry * parent;
 369
 370        __d_drop(dentry);
 371        list_del(&dentry->d_child);
 372        dentry_stat.nr_dentry--;        /* For d_free, below */
 373        dentry_iput(dentry);
 374        parent = dentry->d_parent;
 375        d_free(dentry);
 376        if (parent != dentry)
 377                dput(parent);
 378        spin_lock(&dcache_lock);
 379}
 380
 381/**
 382 * prune_dcache - shrink the dcache
 383 * @count: number of entries to try and free
 384 * @sb: if given, ignore dentries for other superblocks
 385 *         which are being unmounted.
 386 *
 387 * Shrink the dcache. This is done when we need
 388 * more memory, or simply when we need to unmount
 389 * something (at which point we need to unuse
 390 * all dentries).
 391 *
 392 * This function may fail to free any resources if
 393 * all the dentries are in use.
 394 */
 395 
 396static void prune_dcache(int count, struct super_block *sb)
 397{
 398        spin_lock(&dcache_lock);
 399        for (; count ; count--) {
 400                struct dentry *dentry;
 401                struct list_head *tmp;
 402                struct rw_semaphore *s_umount;
 403
 404                cond_resched_lock(&dcache_lock);
 405
 406                tmp = dentry_unused.prev;
 407                if (unlikely(sb)) {
 408                        /* Try to find a dentry for this sb, but don't try
 409                         * too hard, if they aren't near the tail they will
 410                         * be moved down again soon
 411                         */
 412                        int skip = count;
 413                        while (skip &&
 414                               tmp != &dentry_unused &&
 415                               list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
 416                                skip--;
 417                                tmp = tmp->prev;
 418                        }
 419                }
 420                if (tmp == &dentry_unused)
 421                        break;
 422                list_del_init(tmp);
 423                prefetch(dentry_unused.prev);
 424                dentry_stat.nr_unused--;
 425                dentry = list_entry(tmp, struct dentry, d_lru);
 426
 427                spin_lock(&dentry->d_lock);
 428                /*
 429                 * We found an inuse dentry which was not removed from
 430                 * dentry_unused because of laziness during lookup.  Do not free
 431                 * it - just keep it off the dentry_unused list.
 432                 */
 433                if (atomic_read(&dentry->d_count)) {
 434                        spin_unlock(&dentry->d_lock);
 435                        continue;
 436                }
 437                /* If the dentry was recently referenced, don't free it. */
 438                if (dentry->d_flags & DCACHE_REFERENCED) {
 439                        dentry->d_flags &= ~DCACHE_REFERENCED;
 440                        list_add(&dentry->d_lru, &dentry_unused);
 441                        dentry_stat.nr_unused++;
 442                        spin_unlock(&dentry->d_lock);
 443                        continue;
 444                }
 445                /*
 446                 * If the dentry is not DCACHED_REFERENCED, it is time
 447                 * to remove it from the dcache, provided the super block is
 448                 * NULL (which means we are trying to reclaim memory)
 449                 * or this dentry belongs to the same super block that
 450                 * we want to shrink.
 451                 */
 452                /*
 453                 * If this dentry is for "my" filesystem, then I can prune it
 454                 * without taking the s_umount lock (I already hold it).
 455                 */
 456                if (sb && dentry->d_sb == sb) {
 457                        prune_one_dentry(dentry);
 458                        continue;
 459                }
 460                /*
 461                 * ...otherwise we need to be sure this filesystem isn't being
 462                 * unmounted, otherwise we could race with
 463                 * generic_shutdown_super(), and end up holding a reference to
 464                 * an inode while the filesystem is unmounted.
 465                 * So we try to get s_umount, and make sure s_root isn't NULL.
 466                 * (Take a local copy of s_umount to avoid a use-after-free of
 467                 * `dentry').
 468                 */
 469                s_umount = &dentry->d_sb->s_umount;
 470                if (down_read_trylock(s_umount)) {
 471                        if (dentry->d_sb->s_root != NULL) {
 472                                prune_one_dentry(dentry);
 473                                up_read(s_umount);
 474                                continue;
 475                        }
 476                        up_read(s_umount);
 477                }
 478                spin_unlock(&dentry->d_lock);
 479                /* Cannot remove the first dentry, and it isn't appropriate
 480                 * to move it to the head of the list, so give up, and try
 481                 * later
 482                 */
 483                break;
 484        }
 485        spin_unlock(&dcache_lock);
 486}
 487
 488/*
 489 * Shrink the dcache for the specified super block.
 490 * This allows us to unmount a device without disturbing
 491 * the dcache for the other devices.
 492 *
 493 * This implementation makes just two traversals of the
 494 * unused list.  On the first pass we move the selected
 495 * dentries to the most recent end, and on the second
 496 * pass we free them.  The second pass must restart after
 497 * each dput(), but since the target dentries are all at
 498 * the end, it's really just a single traversal.
 499 */
 500
 501/**
 502 * shrink_dcache_sb - shrink dcache for a superblock
 503 * @sb: superblock
 504 *
 505 * Shrink the dcache for the specified super block. This
 506 * is used to free the dcache before unmounting a file
 507 * system
 508 */
 509
 510void shrink_dcache_sb(struct super_block * sb)
 511{
 512        struct list_head *tmp, *next;
 513        struct dentry *dentry;
 514
 515        /*
 516         * Pass one ... move the dentries for the specified
 517         * superblock to the most recent end of the unused list.
 518         */
 519        spin_lock(&dcache_lock);
 520        next = dentry_unused.next;
 521        while (next != &dentry_unused) {
 522                tmp = next;
 523                next = tmp->next;
 524                dentry = list_entry(tmp, struct dentry, d_lru);
 525                if (dentry->d_sb != sb)
 526                        continue;
 527                list_del(tmp);
 528                list_add(tmp, &dentry_unused);
 529        }
 530
 531        /*
 532         * Pass two ... free the dentries for this superblock.
 533         */
 534repeat:
 535        next = dentry_unused.next;
 536        while (next != &dentry_unused) {
 537                tmp = next;
 538                next = tmp->next;
 539                dentry = list_entry(tmp, struct dentry, d_lru);
 540                if (dentry->d_sb != sb)
 541                        continue;
 542                dentry_stat.nr_unused--;
 543                list_del_init(tmp);
 544                spin_lock(&dentry->d_lock);
 545                if (atomic_read(&dentry->d_count)) {
 546                        spin_unlock(&dentry->d_lock);
 547                        continue;
 548                }
 549                prune_one_dentry(dentry);
 550                goto repeat;
 551        }
 552        spin_unlock(&dcache_lock);
 553}
 554
 555/*
 556 * Search for at least 1 mount point in the dentry's subdirs.
 557 * We descend to the next level whenever the d_subdirs
 558 * list is non-empty and continue searching.
 559 */
 560 
 561/**
 562 * have_submounts - check for mounts over a dentry
 563 * @parent: dentry to check.
 564 *
 565 * Return true if the parent or its subdirectories contain
 566 * a mount point
 567 */
 568 
 569int have_submounts(struct dentry *parent)
 570{
 571        struct dentry *this_parent = parent;
 572        struct list_head *next;
 573
 574        spin_lock(&dcache_lock);
 575        if (d_mountpoint(parent))
 576                goto positive;
 577repeat:
 578        next = this_parent->d_subdirs.next;
 579resume:
 580        while (next != &this_parent->d_subdirs) {
 581                struct list_head *tmp = next;
 582                struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
 583                next = tmp->next;
 584                /* Have we found a mount point ? */
 585                if (d_mountpoint(dentry))
 586                        goto positive;
 587                if (!list_empty(&dentry->d_subdirs)) {
 588                        this_parent = dentry;
 589                        goto repeat;
 590                }
 591        }
 592        /*
 593         * All done at this level ... ascend and resume the search.
 594         */
 595        if (this_parent != parent) {
 596                next = this_parent->d_child.next; 
 597                this_parent = this_parent->d_parent;
 598                goto resume;
 599        }
 600        spin_unlock(&dcache_lock);
 601        return 0; /* No mount points found in tree */
 602positive:
 603        spin_unlock(&dcache_lock);
 604        return 1;
 605}
 606
 607/*
 608 * Search the dentry child list for the specified parent,
 609 * and move any unused dentries to the end of the unused
 610 * list for prune_dcache(). We descend to the next level
 611 * whenever the d_subdirs list is non-empty and continue
 612 * searching.
 613 */
 614static int select_parent(struct dentry * parent)
 615{
 616        struct dentry *this_parent = parent;
 617        struct list_head *next;
 618        int found = 0;
 619
 620        spin_lock(&dcache_lock);
 621repeat:
 622        next = this_parent->d_subdirs.next;
 623resume:
 624        while (next != &this_parent->d_subdirs) {
 625                struct list_head *tmp = next;
 626                struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
 627                next = tmp->next;
 628
 629                if (!list_empty(&dentry->d_lru)) {
 630                        dentry_stat.nr_unused--;
 631                        list_del_init(&dentry->d_lru);
 632                }
 633                /* 
 634                 * move only zero ref count dentries to the end 
 635                 * of the unused list for prune_dcache
 636                 */
 637                if (!atomic_read(&dentry->d_count)) {
 638                        list_add(&dentry->d_lru, dentry_unused.prev);
 639                        dentry_stat.nr_unused++;
 640                        found++;
 641                }
 642                /*
 643                 * Descend a level if the d_subdirs list is non-empty.
 644                 */
 645                if (!list_empty(&dentry->d_subdirs)) {
 646                        this_parent = dentry;
 647#ifdef DCACHE_DEBUG
 648printk(KERN_DEBUG "select_parent: descending to %s/%s, found=%d\n",
 649dentry->d_parent->d_name.name, dentry->d_name.name, found);
 650#endif
 651                        goto repeat;
 652                }
 653        }
 654        /*
 655         * All done at this level ... ascend and resume the search.
 656         */
 657        if (this_parent != parent) {
 658                next = this_parent->d_child.next; 
 659                this_parent = this_parent->d_parent;
 660#ifdef DCACHE_DEBUG
 661printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
 662this_parent->d_parent->d_name.name, this_parent->d_name.name, found);
 663#endif
 664                goto resume;
 665        }
 666        spin_unlock(&dcache_lock);
 667        return found;
 668}
 669
 670/**
 671 * shrink_dcache_parent - prune dcache
 672 * @parent: parent of entries to prune
 673 *
 674 * Prune the dcache to remove unused children of the parent dentry.
 675 */
 676 
 677void shrink_dcache_parent(struct dentry * parent)
 678{
 679        int found;
 680
 681        while ((found = select_parent(parent)) != 0)
 682                prune_dcache(found, parent->d_sb);
 683}
 684
 685/**
 686 * shrink_dcache_anon - further prune the cache
 687 * @head: head of d_hash list of dentries to prune
 688 *
 689 * Prune the dentries that are anonymous
 690 *
 691 * parsing d_hash list does not hlist_for_each_rcu() as it
 692 * done under dcache_lock.
 693 *
 694 */
 695void shrink_dcache_anon(struct super_block *sb)
 696{
 697        struct hlist_node *lp;
 698        struct hlist_head *head = &sb->s_anon;
 699        int found;
 700        do {
 701                found = 0;
 702                spin_lock(&dcache_lock);
 703                hlist_for_each(lp, head) {
 704                        struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
 705                        if (!list_empty(&this->d_lru)) {
 706                                dentry_stat.nr_unused--;
 707                                list_del_init(&this->d_lru);
 708                        }
 709
 710                        /* 
 711                         * move only zero ref count dentries to the end 
 712                         * of the unused list for prune_dcache
 713                         */
 714                        if (!atomic_read(&this->d_count)) {
 715                                list_add_tail(&this->d_lru, &dentry_unused);
 716                                dentry_stat.nr_unused++;
 717                                found++;
 718                        }
 719                }
 720                spin_unlock(&dcache_lock);
 721                prune_dcache(found, sb);
 722        } while(found);
 723}
 724
 725/*
 726 * Scan `nr' dentries and return the number which remain.
 727 *
 728 * We need to avoid reentering the filesystem if the caller is performing a
 729 * GFP_NOFS allocation attempt.  One example deadlock is:
 730 *
 731 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
 732 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
 733 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
 734 *
 735 * In this case we return -1 to tell the caller that we baled.
 736 */
 737static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
 738{
 739        if (nr) {
 740                if (!(gfp_mask & __GFP_FS))
 741                        return -1;
 742                prune_dcache(nr, NULL);
 743        }
 744        return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 745}
 746
 747/**
 748 * d_alloc      -       allocate a dcache entry
 749 * @parent: parent of entry to allocate
 750 * @name: qstr of the name
 751 *
 752 * Allocates a dentry. It returns %NULL if there is insufficient memory
 753 * available. On a success the dentry is returned. The name passed in is
 754 * copied and the copy passed in may be reused after this call.
 755 */
 756 
 757struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 758{
 759        struct dentry *dentry;
 760        char *dname;
 761
 762        dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 
 763        if (!dentry)
 764                return NULL;
 765
 766        if (name->len > DNAME_INLINE_LEN-1) {
 767                dname = kmalloc(name->len + 1, GFP_KERNEL);
 768                if (!dname) {
 769                        kmem_cache_free(dentry_cache, dentry); 
 770                        return NULL;
 771                }
 772        } else  {
 773                dname = dentry->d_iname;
 774        }       
 775        dentry->d_name.name = dname;
 776
 777        dentry->d_name.len = name->len;
 778        dentry->d_name.hash = name->hash;
 779        memcpy(dname, name->name, name->len);
 780        dname[name->len] = 0;
 781
 782        atomic_set(&dentry->d_count, 1);
 783        dentry->d_flags = DCACHE_UNHASHED;
 784        dentry->d_lock = SPIN_LOCK_UNLOCKED;
 785        dentry->d_inode = NULL;
 786        dentry->d_parent = NULL;
 787        dentry->d_sb = NULL;
 788        dentry->d_op = NULL;
 789        dentry->d_fsdata = NULL;
 790        dentry->d_extra_attributes = NULL;
 791        dentry->d_mounted = 0;
 792        dentry->d_cookie = NULL;
 793        dentry->d_bucket = NULL;
 794        INIT_HLIST_NODE(&dentry->d_hash);
 795        INIT_LIST_HEAD(&dentry->d_lru);
 796        INIT_LIST_HEAD(&dentry->d_subdirs);
 797        INIT_LIST_HEAD(&dentry->d_alias);
 798
 799        if (parent) {
 800                dentry->d_parent = dget(parent);
 801                dentry->d_sb = parent->d_sb;
 802        } else {
 803                INIT_LIST_HEAD(&dentry->d_child);
 804        }
 805
 806        spin_lock(&dcache_lock);
 807        if (parent)
 808                list_add(&dentry->d_child, &parent->d_subdirs);
 809        dentry_stat.nr_dentry++;
 810        spin_unlock(&dcache_lock);
 811
 812        return dentry;
 813}
 814
 815/**
 816 * d_instantiate - fill in inode information for a dentry
 817 * @entry: dentry to complete
 818 * @inode: inode to attach to this dentry
 819 *
 820 * Fill in inode information in the entry.
 821 *
 822 * This turns negative dentries into productive full members
 823 * of society.
 824 *
 825 * NOTE! This assumes that the inode count has been incremented
 826 * (or otherwise set) by the caller to indicate that it is now
 827 * in use by the dcache.
 828 */
 829 
 830void d_instantiate(struct dentry *entry, struct inode * inode)
 831{
 832        if (!list_empty(&entry->d_alias)) BUG();
 833        spin_lock(&dcache_lock);
 834        if (inode)
 835                list_add(&entry->d_alias, &inode->i_dentry);
 836        entry->d_inode = inode;
 837        audit_update_watch(entry, 0);
 838        spin_unlock(&dcache_lock);
 839        security_d_instantiate(entry, inode);
 840}
 841
 842/**
 843 * d_alloc_root - allocate root dentry
 844 * @root_inode: inode to allocate the root for
 845 *
 846 * Allocate a root ("/") dentry for the inode given. The inode is
 847 * instantiated and returned. %NULL is returned if there is insufficient
 848 * memory or the inode passed is %NULL.
 849 */
 850 
 851struct dentry * d_alloc_root(struct inode * root_inode)
 852{
 853        struct dentry *res = NULL;
 854
 855        if (root_inode) {
 856                static const struct qstr name = { .name = "/", .len = 1 };
 857
 858                res = d_alloc(NULL, &name);
 859                if (res) {
 860                        res->d_sb = root_inode->i_sb;
 861                        res->d_parent = res;
 862                        d_instantiate(res, root_inode);
 863                }
 864        }
 865        return res;
 866}
 867
 868static inline struct hlist_head *d_hash(struct dentry *parent,
 869                                        unsigned long hash)
 870{
 871        hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
 872        hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
 873        return dentry_hashtable + (hash & D_HASHMASK);
 874}
 875
 876/**
 877 * d_alloc_anon - allocate an anonymous dentry
 878 * @inode: inode to allocate the dentry for
 879 *
 880 * This is similar to d_alloc_root.  It is used by filesystems when
 881 * creating a dentry for a given inode, often in the process of 
 882 * mapping a filehandle to a dentry.  The returned dentry may be
 883 * anonymous, or may have a full name (if the inode was already
 884 * in the cache).  The file system may need to make further
 885 * efforts to connect this dentry into the dcache properly.
 886 *
 887 * When called on a directory inode, we must ensure that
 888 * the inode only ever has one dentry.  If a dentry is
 889 * found, that is returned instead of allocating a new one.
 890 *
 891 * On successful return, the reference to the inode has been transferred
 892 * to the dentry.  If %NULL is returned (indicating kmalloc failure),
 893 * the reference on the inode has not been released.
 894 */
 895
 896struct dentry * d_alloc_anon(struct inode *inode)
 897{
 898        static const struct qstr anonstring = { .name = "" };
 899        struct dentry *tmp;
 900        struct dentry *res;
 901
 902        if ((res = d_find_alias(inode))) {
 903                iput(inode);
 904                return res;
 905        }
 906
 907        tmp = d_alloc(NULL, &anonstring);
 908        if (!tmp)
 909                return NULL;
 910
 911        tmp->d_parent = tmp; /* make sure dput doesn't croak */
 912        
 913        spin_lock(&dcache_lock);
 914        res = __d_find_alias(inode, 0);
 915        if (!res) {
 916                /* attach a disconnected dentry */
 917                res = tmp;
 918                tmp = NULL;
 919                spin_lock(&res->d_lock);
 920                res->d_sb = inode->i_sb;
 921                res->d_parent = res;
 922                res->d_inode = inode;
 923
 924                /*
 925                 * Set d_bucket to an "impossible" bucket address so
 926                 * that d_move() doesn't get a false positive
 927                 */
 928                res->d_bucket = NULL;
 929                res->d_flags |= DCACHE_DISCONNECTED;
 930                res->d_flags &= ~DCACHE_UNHASHED;
 931                list_add(&res->d_alias, &inode->i_dentry);
 932                hlist_add_head(&res->d_hash, &inode->i_sb->s_anon);
 933                spin_unlock(&res->d_lock);
 934
 935                inode = NULL; /* don't drop reference */
 936        }
 937        spin_unlock(&dcache_lock);
 938
 939        if (inode)
 940                iput(inode);
 941        if (tmp)
 942                dput(tmp);
 943        return res;
 944}
 945
 946
 947/**
 948 * d_splice_alias - splice a disconnected dentry into the tree if one exists
 949 * @inode:  the inode which may have a disconnected dentry
 950 * @dentry: a negative dentry which we want to point to the inode.
 951 *
 952 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
 953 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
 954 * and return it, else simply d_add the inode to the dentry and return NULL.
 955 *
 956 * This is needed in the lookup routine of any filesystem that is exportable
 957 * (via knfsd) so that we can build dcache paths to directories effectively.
 958 *
 959 * If a dentry was found and moved, then it is returned.  Otherwise NULL
 960 * is returned.  This matches the expected return value of ->lookup.
 961 *
 962 */
 963struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 964{
 965        struct dentry *new = NULL;
 966
 967        if (inode) {
 968                spin_lock(&dcache_lock);
 969                new = __d_find_alias(inode, 1);
 970                if (new) {
 971                        BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
 972                        spin_unlock(&dcache_lock);
 973                        security_d_instantiate(new, inode);
 974                        d_rehash(dentry);
 975                        d_move(new, dentry);
 976                        iput(inode);
 977                } else {
 978                        /* d_instantiate takes dcache_lock, so we do it by hand */
 979                        list_add(&dentry->d_alias, &inode->i_dentry);
 980                        dentry->d_inode = inode;
 981                        audit_update_watch(dentry, 0);
 982                        spin_unlock(&dcache_lock);
 983                        security_d_instantiate(dentry, inode);
 984                        d_rehash(dentry);
 985                }
 986        } else
 987                d_add(dentry, inode);
 988        return new;
 989}
 990
 991
 992/**
 993 * d_lookup - search for a dentry
 994 * @parent: parent dentry
 995 * @name: qstr of name we wish to find
 996 *
 997 * Searches the children of the parent dentry for the name in question. If
 998 * the dentry is found its reference count is incremented and the dentry
 999 * is returned. The caller must use d_put to free the entry when it has
1000 * finished using it. %NULL is returned on failure.
1001 *
1002 * __d_lookup is dcache_lock free. The hash list is protected using RCU.
1003 * Memory barriers are used while updating and doing lockless traversal. 
1004 * To avoid races with d_move while rename is happening, d_lock is used.
1005 *
1006 * Overflows in memcmp(), while d_move, are avoided by keeping the length
1007 * and name pointer in one structure pointed by d_qstr.
1008 *
1009 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
1010 * lookup is going on.
1011 *
1012 * dentry_unused list is not updated even if lookup finds the required dentry
1013 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
1014 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
1015 * acquisition.
1016 *
1017 * d_lookup() is protected against the concurrent renames in some unrelated
1018 * directory using the seqlockt_t rename_lock.
1019 */
1020
1021struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1022{
1023        struct dentry * dentry = NULL;
1024        unsigned long seq;
1025
1026        do {
1027                seq = read_seqbegin(&rename_lock);
1028                dentry = __d_lookup(parent, name);
1029                if (dentry)
1030                        break;
1031        } while (read_seqretry(&rename_lock, seq));
1032        return dentry;
1033}
1034
1035struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1036{
1037        unsigned int len = name->len;
1038        unsigned int hash = name->hash;
1039        const unsigned char *str = name->name;
1040        struct hlist_head *head = d_hash(parent,hash);
1041        struct dentry *found = NULL;
1042        struct hlist_node *node;
1043
1044        rcu_read_lock();
1045        
1046        hlist_for_each_rcu(node, head) {
1047                struct dentry *dentry; 
1048                struct qstr *qstr;
1049
1050                dentry = hlist_entry(node, struct dentry, d_hash);
1051
1052                smp_rmb();
1053
1054                if (dentry->d_name.hash != hash)
1055                        continue;
1056                if (dentry->d_parent != parent)
1057                        continue;
1058
1059                spin_lock(&dentry->d_lock);
1060
1061                /*
1062                 * If lookup ends up in a different bucket due to concurrent
1063                 * rename, fail it
1064                 */
1065                if (unlikely(dentry->d_bucket != head))
1066                        goto terminate;
1067
1068                /*
1069                 * Recheck the dentry after taking the lock - d_move may have
1070                 * changed things.  Don't bother checking the hash because we're
1071                 * about to compare the whole name anyway.
1072                 */
1073                if (dentry->d_parent != parent)
1074                        goto next;
1075
1076                qstr = rcu_dereference(&dentry->d_name);
1077                if (parent->d_op && parent->d_op->d_compare) {
1078                        if (parent->d_op->d_compare(parent, qstr, name))
1079                                goto next;
1080                } else {
1081                        if (qstr->len != len)
1082                                goto next;
1083                        if (memcmp(qstr->name, str, len))
1084                                goto next;
1085                }
1086
1087                if (!d_unhashed(dentry)) {
1088                        atomic_inc(&dentry->d_count);
1089                        found = dentry;
1090                        audit_update_watch(dentry, 0);
1091                }
1092terminate:
1093                spin_unlock(&dentry->d_lock);
1094                break;
1095next:
1096                spin_unlock(&dentry->d_lock);
1097        }
1098        rcu_read_unlock();
1099
1100        return found;
1101}
1102
1103/**
1104 * d_validate - verify dentry provided from insecure source
1105 * @dentry: The dentry alleged to be valid child of @dparent
1106 * @dparent: The parent dentry (known to be valid)
1107 * @hash: Hash of the dentry
1108 * @len: Length of the name
1109 *
1110 * An insecure source has sent us a dentry, here we verify it and dget() it.
1111 * This is used by ncpfs in its readdir implementation.
1112 * Zero is returned in the dentry is invalid.
1113 */
1114 
1115int d_validate(struct dentry *dentry, struct dentry *dparent)
1116{
1117        struct hlist_head *base;
1118        struct hlist_node *lhp;
1119
1120        /* Check whether the ptr might be valid at all.. */
1121        if (!kmem_ptr_validate(dentry_cache, dentry))
1122                goto out;
1123
1124        if (dentry->d_parent != dparent)
1125                goto out;
1126
1127        spin_lock(&dcache_lock);
1128        base = d_hash(dparent, dentry->d_name.hash);
1129        hlist_for_each(lhp,base) { 
1130                /* hlist_for_each_rcu() not required for d_hash list
1131                 * as it is parsed under dcache_lock
1132                 */
1133                if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
1134                        __dget_locked(dentry);
1135                        spin_unlock(&dcache_lock);
1136                        return 1;
1137                }
1138        }
1139        spin_unlock(&dcache_lock);
1140out:
1141        return 0;
1142}
1143
1144/*
1145 * When a file is deleted, we have two options:
1146 * - turn this dentry into a negative dentry
1147 * - unhash this dentry and free it.
1148 *
1149 * Usually, we want to just turn this into
1150 * a negative dentry, but if anybody else is
1151 * currently using the dentry or the inode
1152 * we can't do that and we fall back on removing
1153 * it from the hash queues and waiting for
1154 * it to be deleted later when it has no users
1155 */
1156 
1157/**
1158 * d_delete - delete a dentry
1159 * @dentry: The dentry to delete
1160 *
1161 * Turn the dentry into a negative dentry if possible, otherwise
1162 * remove it from the hash queues so it can be deleted later
1163 */
1164 
1165void d_delete(struct dentry * dentry)
1166{
1167        /*
1168         * Are we the only user?
1169         */
1170        spin_lock(&dcache_lock);
1171        spin_lock(&dentry->d_lock);
1172        if (atomic_read(&dentry->d_count) == 1) {
1173                dentry_iput(dentry);
1174                return;
1175        }
1176
1177        if (!d_unhashed(dentry))
1178                __d_drop(dentry);
1179
1180        spin_unlock(&dentry->d_lock);
1181        spin_unlock(&dcache_lock);
1182}
1183
1184static void __d_rehash(struct dentry * entry, struct hlist_head *list)
1185{
1186
1187        entry->d_flags &= ~DCACHE_UNHASHED;
1188        entry->d_bucket = list;
1189        hlist_add_head_rcu(&entry->d_hash, list);
1190}
1191
1192static void _d_rehash(struct dentry * entry)
1193{
1194        __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
1195}
1196
1197/**
1198 * d_rehash     - add an entry back to the hash
1199 * @entry: dentry to add to the hash
1200 *
1201 * Adds a dentry to the hash according to its name.
1202 */
1203 
1204void d_rehash(struct dentry * entry)
1205{
1206        struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash);
1207
1208        spin_lock(&dcache_lock);
1209        spin_lock(&entry->d_lock);
1210        entry->d_flags &= ~DCACHE_UNHASHED;
1211        spin_unlock(&entry->d_lock);
1212        entry->d_bucket = list;
1213        hlist_add_head_rcu(&entry->d_hash, list);
1214        spin_unlock(&dcache_lock);
1215}
1216
1217#define do_switch(x,y) do { \
1218        __typeof__ (x) __tmp = x; \
1219        x = y; y = __tmp; } while (0)
1220
1221/*
1222 * When switching names, the actual string doesn't strictly have to
1223 * be preserved in the target - because we're dropping the target
1224 * anyway. As such, we can just do a simple memcpy() to copy over
1225 * the new name before we switch.
1226 *
1227 * Note that we have to be a lot more careful about getting the hash
1228 * switched - we have to switch the hash value properly even if it
1229 * then no longer matches the actual (corrupted) string of the target.
1230 * The hash value has to match the hash queue that the dentry is on..
1231 */
1232static void switch_names(struct dentry *dentry, struct dentry *target)
1233{
1234        if (dname_external(target)) {
1235                if (dname_external(dentry)) {
1236                        /*
1237                         * Both external: swap the pointers
1238                         */
1239                        do_switch(target->d_name.name, dentry->d_name.name);
1240                } else {
1241                        /*
1242                         * dentry:internal, target:external.  Steal target's
1243                         * storage and make target internal.
1244                         */
1245                        dentry->d_name.name = target->d_name.name;
1246                        target->d_name.name = target->d_iname;
1247                }
1248        } else {
1249                if (dname_external(dentry)) {
1250                        /*
1251                         * dentry:external, target:internal.  Give dentry's
1252                         * storage to target and make dentry internal
1253                         */
1254                        memcpy(dentry->d_iname, target->d_name.name,
1255                                        target->d_name.len + 1);
1256                        target->d_name.name = dentry->d_name.name;
1257                        dentry->d_name.name = dentry->d_iname;
1258                } else {
1259                        /*
1260                         * Both are internal.  Just copy target to dentry
1261                         */
1262                        memcpy(dentry->d_iname, target->d_name.name,
1263                                        target->d_name.len + 1);
1264                }
1265        }
1266}
1267
1268/*
1269 * We cannibalize "target" when moving dentry on top of it,
1270 * because it's going to be thrown away anyway. We could be more
1271 * polite about it, though.
1272 *
1273 * This forceful removal will result in ugly /proc output if
1274 * somebody holds a file open that got deleted due to a rename.
1275 * We could be nicer about the deleted file, and let it show
1276 * up under the name it got deleted rather than the name that
1277 * deleted it.
1278 */
1279 
1280/**
1281 * d_move_locked - move a dentry
1282 * @dentry: entry to move
1283 * @target: new dentry
1284 *
1285 * Update the dcache to reflect the move of a file name. Negative
1286 * dcache entries should not be moved in this way.
1287 */
1288
1289static void d_move_locked(struct dentry * dentry, struct dentry * target)
1290{
1291        struct hlist_head *list;
1292
1293        if (!dentry->d_inode)
1294                printk(KERN_WARNING "VFS: moving negative dcache entry\n");
1295
1296        write_seqlock(&rename_lock);
1297        /*
1298         * XXXX: do we really need to take target->d_lock?
1299         */
1300        if (target < dentry) {
1301                spin_lock(&target->d_lock);
1302                spin_lock(&dentry->d_lock);
1303        } else {
1304                spin_lock(&dentry->d_lock);
1305                spin_lock(&target->d_lock);
1306        }
1307
1308        audit_update_watch(dentry, 1);
1309        audit_update_watch(target, 1);
1310
1311        /* Move the dentry to the target hash queue, if on different bucket */
1312        if (dentry->d_flags & DCACHE_UNHASHED)
1313                goto already_unhashed;
1314        if (dentry->d_bucket != target->d_bucket) {
1315                hlist_del_rcu(&dentry->d_hash);
1316already_unhashed:
1317                if (target->d_flags & DCACHE_UNHASHED)
1318                        list = d_hash(target->d_parent, target->d_name.hash);
1319                else
1320                        list = target->d_bucket;
1321                __d_rehash(dentry, list);
1322        }
1323
1324        /* Unhash the target: dput() will then get rid of it */
1325        __d_drop(target);
1326
1327        /* flush any possible attributes */
1328        if (dentry->d_extra_attributes) {
1329                kfree(dentry->d_extra_attributes);
1330                dentry->d_extra_attributes = NULL;
1331        }
1332        if (target->d_extra_attributes) {
1333                kfree(target->d_extra_attributes);
1334                target->d_extra_attributes = NULL;
1335        }
1336
1337        list_del(&dentry->d_child);
1338        list_del(&target->d_child);
1339
1340        /* Switch the names.. */
1341        switch_names(dentry, target);
1342        smp_wmb();
1343        do_switch(dentry->d_name.len, target->d_name.len);
1344        do_switch(dentry->d_name.hash, target->d_name.hash);
1345
1346        /* ... and switch the parents */
1347        if (IS_ROOT(dentry)) {
1348                dentry->d_parent = target->d_parent;
1349                target->d_parent = target;
1350                INIT_LIST_HEAD(&target->d_child);
1351        } else {
1352                do_switch(dentry->d_parent, target->d_parent);
1353
1354                /* And add them back to the (new) parent lists */
1355                list_add(&target->d_child, &target->d_parent->d_subdirs);
1356        }
1357
1358        audit_update_watch(dentry, 0);
1359        list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
1360        spin_unlock(&target->d_lock);
1361        spin_unlock(&dentry->d_lock);
1362        write_sequnlock(&rename_lock);
1363}
1364
1365/**
1366 * d_move - move a dentry
1367 * @dentry: entry to move
1368 * @target: new dentry
1369 *
1370 * Update the dcache to reflect the move of a file name. Negative
1371 * dcache entries should not be moved in this way.
1372 */
1373
1374void d_move(struct dentry * dentry, struct dentry * target)
1375{
1376        spin_lock(&dcache_lock);
1377        d_move_locked(dentry, target);
1378        spin_unlock(&dcache_lock);
1379}
1380
1381/*
1382 * Helper that returns 1 if p1 is a parent of p2, else 0
1383 */
1384static int d_isparent(struct dentry *p1, struct dentry *p2)
1385{
1386        struct dentry *p;
1387
1388        for (p = p2; p->d_parent != p; p = p->d_parent) {
1389                if (p->d_parent == p1)
1390                        return 1;
1391        }
1392        return 0;
1393}
1394
1395/*
1396 * This helper attempts to cope with remotely renamed directories
1397 *
1398 * It assumes that the caller is already holding
1399 * dentry->d_parent->d_inode->i_mutex and the dcache_lock
1400 *
1401 * Note: If ever the locking in lock_rename() changes, then please
1402 * remember to update this too...
1403 *
1404 * On return, dcache_lock will have been unlocked.
1405 */
1406static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
1407{
1408        struct semaphore *m1 = NULL, *m2 = NULL;
1409        struct dentry *ret;
1410
1411        /* If alias and dentry share a parent, then no extra locks required */
1412        if (alias->d_parent == dentry->d_parent)
1413                goto out_unalias;
1414
1415        /* Check for loops */
1416        ret = ERR_PTR(-ELOOP);
1417        if (d_isparent(alias, dentry))
1418                goto out_err;
1419
1420        /* See lock_rename() */
1421        ret = ERR_PTR(-EBUSY);
1422        if (down_trylock(&dentry->d_sb->s_vfs_rename_sem))
1423                goto out_err;
1424        m1 = &dentry->d_sb->s_vfs_rename_sem;
1425        if (down_trylock(&alias->d_parent->d_inode->i_sem))
1426                goto out_err;
1427        m2 = &alias->d_parent->d_inode->i_sem;
1428out_unalias:
1429        d_move_locked(alias, dentry);
1430        ret = alias;
1431out_err:
1432        spin_unlock(&dcache_lock);
1433        if (m2)
1434                up(m2);
1435        if (m1)
1436                up(m1);
1437        return ret;
1438}
1439
1440/*
1441 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
1442 * named dentry in place of the dentry to be replaced.
1443 */
1444static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1445{
1446        struct dentry *dparent, *aparent;
1447
1448        switch_names(dentry, anon);
1449        do_switch(dentry->d_name.len, anon->d_name.len);
1450        do_switch(dentry->d_name.hash, anon->d_name.hash);
1451
1452        dparent = dentry->d_parent;
1453        aparent = anon->d_parent;
1454
1455        dentry->d_parent = (aparent == anon) ? dentry : aparent;
1456        list_del(&dentry->d_child);
1457        if (!IS_ROOT(dentry))
1458                list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
1459        else
1460                INIT_LIST_HEAD(&dentry->d_child);
1461
1462        anon->d_parent = (dparent == dentry) ? anon : dparent;
1463        list_del(&anon->d_child);
1464        if (!IS_ROOT(anon))
1465                list_add(&anon->d_child, &anon->d_parent->d_subdirs);
1466        else
1467                INIT_LIST_HEAD(&anon->d_child);
1468
1469        anon->d_flags &= ~DCACHE_DISCONNECTED;
1470}
1471
1472/**
1473 * d_instantiate_unique - instantiate a non-aliased dentry
1474 * @entry: dentry to instantiate
1475 * @inode: inode to attach to this dentry
1476 *
1477 * Fill in inode information in the entry. On success, it returns NULL.
1478 * If an unhashed alias of "entry" already exists, then we return the
1479 * aliased dentry instead and drop one reference to inode.
1480 *
1481 * Note that in order to avoid conflicts with rename() etc, the caller
1482 * had better be holding the parent directory semaphore.
1483 *
1484 * This also assumes that the inode count has been incremented
1485 * (or otherwise set) by the caller to indicate that it is now
1486 * in use by the dcache.
1487 */
1488static struct dentry *__d_instantiate_unique(struct dentry *entry,
1489                                        struct inode *inode)
1490{
1491        struct dentry *alias;
1492        int len = entry->d_name.len;
1493        const char *name = entry->d_name.name;
1494        unsigned int hash = entry->d_name.hash;
1495
1496        if (!inode) {
1497                entry->d_inode = NULL;
1498                return NULL;
1499        }
1500
1501        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1502                struct qstr *qstr = &alias->d_name;
1503
1504                if (qstr->hash != hash)
1505                        continue;
1506                if (alias->d_parent != entry->d_parent)
1507                        continue;
1508                if (qstr->len != len)
1509                        continue;
1510                if (memcmp(qstr->name, name, len))
1511                        continue;
1512                dget_locked(alias);
1513                return alias;
1514        }
1515
1516        list_add(&entry->d_alias, &inode->i_dentry);
1517        entry->d_inode = inode;
1518        //fsnotify_d_instantiate(entry, inode);
1519        return NULL;
1520}
1521
1522struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1523{
1524        struct dentry *actual;
1525
1526        BUG_ON(!d_unhashed(dentry));
1527
1528        spin_lock(&dcache_lock);
1529
1530        if (!inode) {
1531                actual = dentry;
1532                dentry->d_inode = NULL;
1533                goto found_lock;
1534        }
1535
1536        if (S_ISDIR(inode->i_mode)) {
1537                struct dentry *alias;
1538
1539                /* Does an aliased dentry already exist? */
1540                alias = __d_find_alias(inode, 0);
1541                if (alias) {
1542                        actual = alias;
1543                        /* Is this an anonymous mountpoint that we could splice
1544                         * into our tree? */
1545                        if (IS_ROOT(alias)) {
1546                                spin_lock(&alias->d_lock);
1547                                __d_materialise_dentry(dentry, alias);
1548                                __d_drop(alias);
1549                                goto found;
1550                        }
1551                        /* Nope, but we must(!) avoid directory aliasing */
1552                        actual = __d_unalias(dentry, alias);
1553                        if (IS_ERR(actual))
1554                                dput(alias);
1555                        goto out_nolock;
1556                }
1557        }
1558
1559        /* Add a unique reference */
1560        actual = __d_instantiate_unique(dentry, inode);
1561        if (!actual)
1562                actual = dentry;
1563        else if (unlikely(!d_unhashed(actual)))
1564                goto shouldnt_be_hashed;
1565
1566found_lock:
1567        spin_lock(&actual->d_lock);
1568found:
1569        _d_rehash(actual);
1570        spin_unlock(&actual->d_lock);
1571        spin_unlock(&dcache_lock);
1572out_nolock:
1573        if (actual == dentry) {
1574                security_d_instantiate(dentry, inode);
1575                return NULL;
1576        }
1577
1578        iput(inode);
1579        return actual;
1580
1581shouldnt_be_hashed:
1582        spin_unlock(&dcache_lock);
1583        BUG();
1584        goto shouldnt_be_hashed;
1585}
1586
1587struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1588{
1589        struct dentry *result;
1590
1591        BUG_ON(!list_empty(&entry->d_alias));
1592
1593        spin_lock(&dcache_lock);
1594        result = __d_instantiate_unique(entry, inode);
1595        spin_unlock(&dcache_lock);
1596
1597        if (!result) {
1598                security_d_instantiate(entry, inode);
1599                return NULL;
1600        }
1601
1602        BUG_ON(!d_unhashed(result));
1603        iput(inode);
1604        return result;
1605}
1606
1607/**
1608 * d_path - return the path of a dentry
1609 * @dentry: dentry to report
1610 * @vfsmnt: vfsmnt to which the dentry belongs
1611 * @root: root dentry
1612 * @rootmnt: vfsmnt to which the root dentry belongs
1613 * @buffer: buffer to return value in
1614 * @buflen: buffer length
1615 *
1616 * Convert a dentry into an ASCII path name. If the entry has been deleted
1617 * the string " (deleted)" is appended. Note that this is ambiguous.
1618 *
1619 * Returns the buffer or an error code if the path was too long.
1620 *
1621 * "buflen" should be positive. Caller holds the dcache_lock.
1622 */
1623char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt,
1624                        struct dentry *root, struct vfsmount *rootmnt,
1625                        char *buffer, int buflen)
1626{
1627        char * end = buffer+buflen;
1628        char * retval;
1629        int namelen;
1630
1631        *--end = '\0';
1632        buflen--;
1633        if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
1634                buflen -= 10;
1635                end -= 10;
1636                if (buflen < 0)
1637                        goto Elong;
1638                memcpy(end, " (deleted)", 10);
1639        }
1640
1641        if (buflen < 1)
1642                goto Elong;
1643        /* Get '/' right */
1644        retval = end-1;
1645        *retval = '/';
1646
1647        for (;;) {
1648                struct dentry * parent;
1649
1650                if (dentry == root && vfsmnt == rootmnt)
1651                        break;
1652                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
1653                        /* Global root? */
1654                        spin_lock(&vfsmount_lock);
1655                        if (vfsmnt->mnt_parent == vfsmnt) {
1656                                spin_unlock(&vfsmount_lock);
1657                                goto global_root;
1658                        }
1659                        dentry = vfsmnt->mnt_mountpoint;
1660                        vfsmnt = vfsmnt->mnt_parent;
1661                        spin_unlock(&vfsmount_lock);
1662                        continue;
1663                }
1664                parent = dentry->d_parent;
1665                prefetch(parent);
1666                namelen = dentry->d_name.len;
1667                buflen -= namelen + 1;
1668                if (buflen < 0)
1669                        goto Elong;
1670                end -= namelen;
1671                memcpy(end, dentry->d_name.name, namelen);
1672                *--end = '/';
1673                retval = end;
1674                dentry = parent;
1675        }
1676
1677        return retval;
1678
1679global_root:
1680        namelen = dentry->d_name.len;
1681        buflen -= namelen;
1682        if (buflen < 0)
1683                goto Elong;
1684        retval -= namelen-1;    /* hit the slash */
1685        memcpy(retval, dentry->d_name.name, namelen);
1686        return retval;
1687Elong:
1688        return ERR_PTR(-ENAMETOOLONG);
1689}
1690
1691EXPORT_SYMBOL_GPL(__d_path);
1692
1693/* write full pathname into buffer and return start of pathname */
1694char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
1695                                char *buf, int buflen)
1696{
1697        char *res;
1698        struct vfsmount *rootmnt;
1699        struct dentry *root;
1700
1701        read_lock(&current->fs->lock);
1702        rootmnt = mntget(current->fs->rootmnt);
1703        root = dget(current->fs->root);
1704        read_unlock(&current->fs->lock);
1705        spin_lock(&dcache_lock);
1706        res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen);
1707        spin_unlock(&dcache_lock);
1708        dput(root);
1709        mntput(rootmnt);
1710        return res;
1711}
1712
1713/*
1714 * NOTE! The user-level library version returns a
1715 * character pointer. The kernel system call just
1716 * returns the length of the buffer filled (which
1717 * includes the ending '\0' character), or a negative
1718 * error value. So libc would do something like
1719 *
1720 *      char *getcwd(char * buf, size_t size)
1721 *      {
1722 *              int retval;
1723 *
1724 *              retval = sys_getcwd(buf, size);
1725 *              if (retval >= 0)
1726 *                      return buf;
1727 *              errno = -retval;
1728 *              return NULL;
1729 *      }
1730 */
1731asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
1732{
1733        int error;
1734        struct vfsmount *pwdmnt, *rootmnt;
1735        struct dentry *pwd, *root;
1736        char *page = (char *) __get_free_page(GFP_USER);
1737
1738        if (!page)
1739                return -ENOMEM;
1740
1741        read_lock(&current->fs->lock);
1742        pwdmnt = mntget(current->fs->pwdmnt);
1743        pwd = dget(current->fs->pwd);
1744        rootmnt = mntget(current->fs->rootmnt);
1745        root = dget(current->fs->root);
1746        read_unlock(&current->fs->lock);
1747
1748        error = -ENOENT;
1749        /* Has the current directory has been unlinked? */
1750        spin_lock(&dcache_lock);
1751        if (pwd->d_parent == pwd || !d_unhashed(pwd)) {
1752                unsigned long len;
1753                char * cwd;
1754
1755                cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE);
1756                spin_unlock(&dcache_lock);
1757
1758                error = PTR_ERR(cwd);
1759                if (IS_ERR(cwd))
1760                        goto out;
1761
1762                error = -ERANGE;
1763                len = PAGE_SIZE + page - cwd;
1764                if (len <= size) {
1765                        error = len;
1766                        if (copy_to_user(buf, cwd, len))
1767                                error = -EFAULT;
1768                }
1769        } else
1770                spin_unlock(&dcache_lock);
1771
1772out:
1773        dput(pwd);
1774        mntput(pwdmnt);
1775        dput(root);
1776        mntput(rootmnt);
1777        free_page((unsigned long) page);
1778        return error;
1779}
1780
1781/*
1782 * Test whether new_dentry is a subdirectory of old_dentry.
1783 *
1784 * Trivially implemented using the dcache structure
1785 */
1786
1787/**
1788 * is_subdir - is new dentry a subdirectory of old_dentry
1789 * @new_dentry: new dentry
1790 * @old_dentry: old dentry
1791 *
1792 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
1793 * Returns 0 otherwise.
1794 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
1795 */
1796  
1797int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry)
1798{
1799        int result;
1800        struct dentry * saved = new_dentry;
1801        unsigned long seq;
1802
1803        result = 0;
1804        /* need rcu_readlock to protect against the d_parent trashing due to
1805         * d_move
1806         */
1807        rcu_read_lock();
1808        do {
1809                /* for restarting inner loop in case of seq retry */
1810                new_dentry = saved;
1811                seq = read_seqbegin(&rename_lock);
1812                for (;;) {
1813                        if (new_dentry != old_dentry) {
1814                                struct dentry * parent = new_dentry->d_parent;
1815                                if (parent == new_dentry)
1816                                        break;
1817                                new_dentry = parent;
1818                                continue;
1819                        }
1820                        result = 1;
1821                        break;
1822                }
1823        } while (read_seqretry(&rename_lock, seq));
1824        rcu_read_unlock();
1825
1826        return result;
1827}
1828
1829void d_genocide(struct dentry *root)
1830{
1831        struct dentry *this_parent = root;
1832        struct list_head *next;
1833
1834        spin_lock(&dcache_lock);
1835repeat:
1836        next = this_parent->d_subdirs.next;
1837resume:
1838        while (next != &this_parent->d_subdirs) {
1839                struct list_head *tmp = next;
1840                struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
1841                next = tmp->next;
1842                if (d_unhashed(dentry)||!dentry->d_inode)
1843                        continue;
1844                if (!list_empty(&dentry->d_subdirs)) {
1845                        this_parent = dentry;
1846                        goto repeat;
1847                }
1848                atomic_dec(&dentry->d_count);
1849        }
1850        if (this_parent != root) {
1851                next = this_parent->d_child.next; 
1852                atomic_dec(&this_parent->d_count);
1853                this_parent = this_parent->d_parent;
1854                goto resume;
1855        }
1856        spin_unlock(&dcache_lock);
1857}
1858
1859/**
1860 * find_inode_number - check for dentry with name
1861 * @dir: directory to check
1862 * @name: Name to find.
1863 *
1864 * Check whether a dentry already exists for the given name,
1865 * and return the inode number if it has an inode. Otherwise
1866 * 0 is returned.
1867 *
1868 * This routine is used to post-process directory listings for
1869 * filesystems using synthetic inode numbers, and is necessary
1870 * to keep getcwd() working.
1871 */
1872 
1873ino_t find_inode_number(struct dentry *dir, struct qstr *name)
1874{
1875        struct dentry * dentry;
1876        ino_t ino = 0;
1877
1878        /*
1879         * Check for a fs-specific hash function. Note that we must
1880         * calculate the standard hash first, as the d_op->d_hash()
1881         * routine may choose to leave the hash value unchanged.
1882         */
1883        name->hash = full_name_hash(name->name, name->len);
1884        if (dir->d_op && dir->d_op->d_hash)
1885        {
1886                if (dir->d_op->d_hash(dir, name) != 0)
1887                        goto out;
1888        }
1889
1890        dentry = d_lookup(dir, name);
1891        if (dentry)
1892        {
1893                if (dentry->d_inode)
1894                        ino = dentry->d_inode->i_ino;
1895                dput(dentry);
1896        }
1897out:
1898        return ino;
1899}
1900
1901static __initdata unsigned long dhash_entries;
1902static int __init set_dhash_entries(char *str)
1903{
1904        if (!str)
1905                return 0;
1906        dhash_entries = simple_strtoul(str, &str, 0);
1907        return 1;
1908}
1909__setup("dhash_entries=", set_dhash_entries);
1910
1911static void __init dcache_init_early(void)
1912{
1913        int loop;
1914
1915        dentry_hashtable =
1916                alloc_large_system_hash("Dentry cache",
1917                                        sizeof(struct hlist_head),
1918                                        dhash_entries,
1919                                        13,
1920                                        0,
1921                                        &d_hash_shift,
1922                                        &d_hash_mask);
1923
1924        for (loop = 0; loop < (1 << d_hash_shift); loop++)
1925                INIT_HLIST_HEAD(&dentry_hashtable[loop]);
1926}
1927
1928void flush_dentry_attributes (void)
1929{
1930        struct hlist_node *tmp;
1931        struct dentry *dentry;
1932        int i;
1933
1934        spin_lock(&dcache_lock);
1935        for (i = 0; i <= d_hash_mask; i++)
1936                hlist_for_each_entry(dentry, tmp, dentry_hashtable+i, d_hash) {
1937                        kfree(dentry->d_extra_attributes);
1938                        dentry->d_extra_attributes = NULL;
1939                }
1940        spin_unlock(&dcache_lock);
1941}
1942
1943EXPORT_SYMBOL_GPL(flush_dentry_attributes);
1944
1945static void __init dcache_init(unsigned long mempages)
1946{
1947        /* 
1948         * A constructor could be added for stable state like the lists,
1949         * but it is probably not worth it because of the cache nature
1950         * of the dcache. 
1951         */
1952        dentry_cache = kmem_cache_create("dentry_cache",
1953                                         sizeof(struct dentry),
1954                                         0,
1955                                         SLAB_RECLAIM_ACCOUNT|SLAB_PANIC,
1956                                         NULL, NULL);
1957        
1958        set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
1959}
1960
1961/* SLAB cache for __getname() consumers */
1962kmem_cache_t *names_cachep;
1963
1964/* SLAB cache for file structures */
1965kmem_cache_t *filp_cachep;
1966
1967EXPORT_SYMBOL(d_genocide);
1968
1969extern void bdev_cache_init(void);
1970extern void chrdev_init(void);
1971
1972void __init vfs_caches_init_early(void)
1973{
1974        dcache_init_early();
1975        inode_init_early();
1976}
1977
1978void __init vfs_caches_init(unsigned long mempages)
1979{
1980        unsigned long reserve;
1981
1982        /* Base hash sizes on available memory, with a reserve equal to
1983           150% of current kernel size */
1984
1985        reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
1986        mempages -= reserve;
1987
1988        names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
1989                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1990
1991        filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
1992                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
1993
1994        dcache_init(mempages);
1995        inode_init(mempages);
1996        files_init(mempages);
1997        mnt_init(mempages);
1998        bdev_cache_init();
1999        chrdev_init();
2000}
2001
2002EXPORT_SYMBOL(d_alloc);
2003EXPORT_SYMBOL(d_alloc_anon);
2004EXPORT_SYMBOL(d_alloc_root);
2005EXPORT_SYMBOL(d_delete);
2006EXPORT_SYMBOL(d_find_alias);
2007EXPORT_SYMBOL(d_instantiate);
2008EXPORT_SYMBOL(d_invalidate);
2009EXPORT_SYMBOL(d_lookup);
2010EXPORT_SYMBOL(d_move);
2011EXPORT_SYMBOL_GPL(d_materialise_unique);
2012EXPORT_SYMBOL(d_instantiate_unique);
2013EXPORT_SYMBOL(d_path);
2014EXPORT_SYMBOL(d_prune_aliases);
2015EXPORT_SYMBOL(d_rehash);
2016EXPORT_SYMBOL(d_splice_alias);
2017EXPORT_SYMBOL(d_validate);
2018EXPORT_SYMBOL(dget_locked);
2019EXPORT_SYMBOL(dput);
2020EXPORT_SYMBOL(find_inode_number);
2021EXPORT_SYMBOL(have_submounts);
2022EXPORT_SYMBOL(names_cachep);
2023EXPORT_SYMBOL(shrink_dcache_parent);
2024EXPORT_SYMBOL(shrink_dcache_sb);
2025