RHEL4/mm/highmem.c
<<
>>
Prefs
   1/*
   2 * High memory handling common code and variables.
   3 *
   4 * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
   5 *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
   6 *
   7 *
   8 * Redesigned the x86 32-bit VM architecture to deal with
   9 * 64-bit physical space. With current x86 CPUs this
  10 * means up to 64 Gigabytes physical RAM.
  11 *
  12 * Rewrote high memory support to move the page cache into
  13 * high memory. Implemented permanent (schedulable) kmaps
  14 * based on Linus' idea.
  15 *
  16 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
  17 */
  18
  19#include <linux/mm.h>
  20#include <linux/module.h>
  21#include <linux/swap.h>
  22#include <linux/bio.h>
  23#include <linux/pagemap.h>
  24#include <linux/mempool.h>
  25#include <linux/blkdev.h>
  26#include <linux/init.h>
  27#include <linux/hash.h>
  28#include <linux/highmem.h>
  29#include <asm/tlbflush.h>
  30
  31atomic_t bouncepages = ATOMIC_INIT(0);
  32
  33static mempool_t *page_pool, *isa_page_pool;
  34
  35static void *page_pool_alloc(int gfp_mask, void *data)
  36{
  37        int gfp = gfp_mask | (int) (long) data;
  38
  39        return alloc_page(gfp);
  40}
  41
  42static void page_pool_free(void *page, void *data)
  43{
  44        __free_page(page);
  45}
  46
  47/*
  48 * Virtual_count is not a pure "count".
  49 *  0 means that it is not mapped, and has not been mapped
  50 *    since a TLB flush - it is usable.
  51 *  1 means that there are no users, but it has been mapped
  52 *    since the last TLB flush - so we can't use it.
  53 *  n means that there are (n-1) current users of it.
  54 */
  55#ifdef CONFIG_HIGHMEM
  56static int pkmap_count[LAST_PKMAP];
  57static unsigned int last_pkmap_nr;
  58static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
  59
  60pte_t * pkmap_page_table;
  61
  62static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
  63
  64static void flush_all_zero_pkmaps(void)
  65{
  66        int i;
  67
  68        flush_cache_kmaps();
  69
  70        for (i = 0; i < LAST_PKMAP; i++) {
  71                struct page *page;
  72
  73                /*
  74                 * zero means we don't have anything to do,
  75                 * >1 means that it is still in use. Only
  76                 * a count of 1 means that it is free but
  77                 * needs to be unmapped
  78                 */
  79                if (pkmap_count[i] != 1)
  80                        continue;
  81                pkmap_count[i] = 0;
  82
  83                /* sanity check */
  84                if (pte_none(pkmap_page_table[i]))
  85                        BUG();
  86
  87                /*
  88                 * Don't need an atomic fetch-and-clear op here;
  89                 * no-one has the page mapped, and cannot get at
  90                 * its virtual address (and hence PTE) without first
  91                 * getting the kmap_lock (which is held here).
  92                 * So no dangers, even with speculative execution.
  93                 */
  94                page = pte_page(pkmap_page_table[i]);
  95                pte_clear(&pkmap_page_table[i]);
  96
  97                set_page_address(page, NULL);
  98        }
  99        flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
 100}
 101
 102static inline unsigned long map_new_virtual(struct page *page)
 103{
 104        unsigned long vaddr;
 105        int count;
 106
 107start:
 108        count = LAST_PKMAP;
 109        /* Find an empty entry */
 110        for (;;) {
 111                last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
 112                if (!last_pkmap_nr) {
 113                        flush_all_zero_pkmaps();
 114                        count = LAST_PKMAP;
 115                }
 116                if (!pkmap_count[last_pkmap_nr])
 117                        break;  /* Found a usable entry */
 118                if (--count)
 119                        continue;
 120
 121                /*
 122                 * Sleep for somebody else to unmap their entries
 123                 */
 124                {
 125                        DECLARE_WAITQUEUE(wait, current);
 126
 127                        __set_current_state(TASK_UNINTERRUPTIBLE);
 128                        add_wait_queue(&pkmap_map_wait, &wait);
 129                        spin_unlock(&kmap_lock);
 130                        schedule();
 131                        remove_wait_queue(&pkmap_map_wait, &wait);
 132                        spin_lock(&kmap_lock);
 133
 134                        /* Somebody else might have mapped it while we slept */
 135                        if (page_address(page))
 136                                return (unsigned long)page_address(page);
 137
 138                        /* Re-start */
 139                        goto start;
 140                }
 141        }
 142        vaddr = PKMAP_ADDR(last_pkmap_nr);
 143        set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
 144
 145        pkmap_count[last_pkmap_nr] = 1;
 146        set_page_address(page, (void *)vaddr);
 147
 148        return vaddr;
 149}
 150
 151#ifdef CONFIG_XEN
 152void kmap_flush_unused(void)
 153{
 154        spin_lock(&kmap_lock);
 155        flush_all_zero_pkmaps();
 156        spin_unlock(&kmap_lock);
 157}
 158
 159EXPORT_SYMBOL(kmap_flush_unused);
 160#endif
 161
 162void fastcall *kmap_high(struct page *page)
 163{
 164        unsigned long vaddr;
 165
 166        /*
 167         * For highmem pages, we can't trust "virtual" until
 168         * after we have the lock.
 169         *
 170         * We cannot call this from interrupts, as it may block
 171         */
 172        spin_lock(&kmap_lock);
 173        vaddr = (unsigned long)page_address(page);
 174        if (!vaddr)
 175                vaddr = map_new_virtual(page);
 176        pkmap_count[PKMAP_NR(vaddr)]++;
 177        if (pkmap_count[PKMAP_NR(vaddr)] < 2)
 178                BUG();
 179        spin_unlock(&kmap_lock);
 180        return (void*) vaddr;
 181}
 182
 183EXPORT_SYMBOL(kmap_high);
 184
 185void fastcall kunmap_high(struct page *page)
 186{
 187        unsigned long vaddr;
 188        unsigned long nr;
 189        int need_wakeup;
 190
 191        spin_lock(&kmap_lock);
 192        vaddr = (unsigned long)page_address(page);
 193        if (!vaddr)
 194                BUG();
 195        nr = PKMAP_NR(vaddr);
 196
 197        /*
 198         * A count must never go down to zero
 199         * without a TLB flush!
 200         */
 201        need_wakeup = 0;
 202        switch (--pkmap_count[nr]) {
 203        case 0:
 204                BUG();
 205        case 1:
 206                /*
 207                 * Avoid an unnecessary wake_up() function call.
 208                 * The common case is pkmap_count[] == 1, but
 209                 * no waiters.
 210                 * The tasks queued in the wait-queue are guarded
 211                 * by both the lock in the wait-queue-head and by
 212                 * the kmap_lock.  As the kmap_lock is held here,
 213                 * no need for the wait-queue-head's lock.  Simply
 214                 * test if the queue is empty.
 215                 */
 216                need_wakeup = waitqueue_active(&pkmap_map_wait);
 217        }
 218        spin_unlock(&kmap_lock);
 219
 220        /* do wake-up, if needed, race-free outside of the spin lock */
 221        if (need_wakeup)
 222                wake_up(&pkmap_map_wait);
 223}
 224
 225EXPORT_SYMBOL(kunmap_high);
 226
 227#define POOL_SIZE       64
 228
 229static __init int init_emergency_pool(void)
 230{
 231        struct sysinfo i;
 232        si_meminfo(&i);
 233        si_swapinfo(&i);
 234        
 235        if (!i.totalhigh)
 236                return 0;
 237
 238        page_pool = mempool_create(POOL_SIZE, page_pool_alloc, page_pool_free, NULL);
 239        if (!page_pool)
 240                BUG();
 241        printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
 242
 243        return 0;
 244}
 245
 246__initcall(init_emergency_pool);
 247
 248/*
 249 * highmem version, map in to vec
 250 */
 251static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
 252{
 253        unsigned long flags;
 254        unsigned char *vto;
 255
 256        local_irq_save(flags);
 257        vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
 258        memcpy(vto + to->bv_offset, vfrom, to->bv_len);
 259        kunmap_atomic(vto, KM_BOUNCE_READ);
 260        local_irq_restore(flags);
 261}
 262
 263#else /* CONFIG_HIGHMEM */
 264
 265#define bounce_copy_vec(to, vfrom)      \
 266        memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
 267
 268#endif
 269
 270#define ISA_POOL_SIZE   16
 271
 272/*
 273 * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
 274 * as the max address, so check if the pool has already been created.
 275 */
 276int init_emergency_isa_pool(void)
 277{
 278        if (isa_page_pool)
 279                return 0;
 280
 281        isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc, page_pool_free, (void *) __GFP_DMA);
 282        if (!isa_page_pool)
 283                BUG();
 284
 285        printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
 286        return 0;
 287}
 288
 289/*
 290 * Simple bounce buffer support for highmem pages. Depending on the
 291 * queue gfp mask set, *to may or may not be a highmem page. kmap it
 292 * always, it will do the Right Thing
 293 */
 294static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
 295{
 296        unsigned char *vfrom;
 297        struct bio_vec *tovec, *fromvec;
 298        int i;
 299
 300        __bio_for_each_segment(tovec, to, i, 0) {
 301                fromvec = from->bi_io_vec + i;
 302
 303                /*
 304                 * not bounced
 305                 */
 306                if (tovec->bv_page == fromvec->bv_page)
 307                        continue;
 308
 309                /*
 310                 * fromvec->bv_offset and fromvec->bv_len might have been
 311                 * modified by the block layer, so use the original copy,
 312                 * bounce_copy_vec already uses tovec->bv_len
 313                 */
 314                vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
 315
 316                flush_dcache_page(tovec->bv_page);
 317                bounce_copy_vec(tovec, vfrom);
 318        }
 319}
 320
 321static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
 322{
 323        struct bio *bio_orig = bio->bi_private;
 324        struct bio_vec *bvec, *org_vec;
 325        int i;
 326
 327        if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
 328                set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
 329
 330        /*
 331         * free up bounce indirect pages used
 332         */
 333        __bio_for_each_segment(bvec, bio, i, 0) {
 334                org_vec = bio_orig->bi_io_vec + i;
 335                if (bvec->bv_page == org_vec->bv_page)
 336                        continue;
 337
 338                mempool_free(bvec->bv_page, pool);      
 339                atomic_dec(&bouncepages);
 340        }
 341
 342        bio_endio(bio_orig, bio_orig->bi_size, err);
 343        bio_put(bio);
 344}
 345
 346static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done,int err)
 347{
 348        if (bio->bi_size)
 349                return 1;
 350
 351        bounce_end_io(bio, page_pool, err);
 352        return 0;
 353}
 354
 355static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err)
 356{
 357        if (bio->bi_size)
 358                return 1;
 359
 360        bounce_end_io(bio, isa_page_pool, err);
 361        return 0;
 362}
 363
 364static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err)
 365{
 366        struct bio *bio_orig = bio->bi_private;
 367
 368        if (test_bit(BIO_UPTODATE, &bio->bi_flags))
 369                copy_to_high_bio_irq(bio_orig, bio);
 370
 371        bounce_end_io(bio, pool, err);
 372}
 373
 374static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
 375{
 376        if (bio->bi_size)
 377                return 1;
 378
 379        __bounce_end_io_read(bio, page_pool, err);
 380        return 0;
 381}
 382
 383static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err)
 384{
 385        if (bio->bi_size)
 386                return 1;
 387
 388        __bounce_end_io_read(bio, isa_page_pool, err);
 389        return 0;
 390}
 391
 392static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
 393                        mempool_t *pool)
 394{
 395        struct page *page;
 396        struct bio *bio = NULL;
 397        int i, rw = bio_data_dir(*bio_orig);
 398        struct bio_vec *to, *from;
 399
 400        bio_for_each_segment(from, *bio_orig, i) {
 401                page = from->bv_page;
 402
 403                /*
 404                 * is destination page below bounce pfn?
 405                 */
 406                if (page_to_pfn(page) < q->bounce_pfn)
 407                        continue;
 408
 409                /*
 410                 * irk, bounce it
 411                 */
 412                if (!bio)
 413                        bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
 414
 415                to = bio->bi_io_vec + i;
 416
 417                to->bv_page = mempool_alloc(pool, q->bounce_gfp);
 418                to->bv_len = from->bv_len;
 419                to->bv_offset = from->bv_offset;
 420                atomic_inc(&bouncepages);
 421
 422                if (rw == WRITE) {
 423                        char *vto, *vfrom;
 424
 425                        flush_dcache_page(from->bv_page);
 426                        vto = page_address(to->bv_page) + to->bv_offset;
 427                        vfrom = kmap(from->bv_page) + from->bv_offset;
 428                        memcpy(vto, vfrom, to->bv_len);
 429                        kunmap(from->bv_page);
 430                }
 431        }
 432
 433        /*
 434         * no pages bounced
 435         */
 436        if (!bio)
 437                return;
 438
 439        /*
 440         * at least one page was bounced, fill in possible non-highmem
 441         * pages
 442         */
 443        __bio_for_each_segment(from, *bio_orig, i, 0) {
 444                to = bio_iovec_idx(bio, i);
 445                if (!to->bv_page) {
 446                        to->bv_page = from->bv_page;
 447                        to->bv_len = from->bv_len;
 448                        to->bv_offset = from->bv_offset;
 449                }
 450        }
 451
 452        bio->bi_bdev = (*bio_orig)->bi_bdev;
 453        bio->bi_flags |= (1 << BIO_BOUNCED);
 454        bio->bi_sector = (*bio_orig)->bi_sector;
 455        bio->bi_rw = (*bio_orig)->bi_rw;
 456
 457        bio->bi_vcnt = (*bio_orig)->bi_vcnt;
 458        bio->bi_idx = (*bio_orig)->bi_idx;
 459        bio->bi_size = (*bio_orig)->bi_size;
 460
 461        if (pool == page_pool) {
 462                bio->bi_end_io = bounce_end_io_write;
 463                if (rw == READ)
 464                        bio->bi_end_io = bounce_end_io_read;
 465        } else {
 466                bio->bi_end_io = bounce_end_io_write_isa;
 467                if (rw == READ)
 468                        bio->bi_end_io = bounce_end_io_read_isa;
 469        }
 470
 471        bio->bi_private = *bio_orig;
 472        *bio_orig = bio;
 473}
 474
 475void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
 476{
 477        mempool_t *pool;
 478
 479        /*
 480         * for non-isa bounce case, just check if the bounce pfn is equal
 481         * to or bigger than the highest pfn in the system -- in that case,
 482         * don't waste time iterating over bio segments
 483         */
 484        if (!(q->bounce_gfp & GFP_DMA)) {
 485                if (q->bounce_pfn >= blk_max_pfn)
 486                        return;
 487                pool = page_pool;
 488        } else {
 489                BUG_ON(!isa_page_pool);
 490                pool = isa_page_pool;
 491        }
 492
 493        /*
 494         * slow path
 495         */
 496        __blk_queue_bounce(q, bio_orig, pool);
 497}
 498
 499EXPORT_SYMBOL(blk_queue_bounce);
 500
 501#if defined(HASHED_PAGE_VIRTUAL)
 502
 503#define PA_HASH_ORDER   7
 504
 505/*
 506 * Describes one page->virtual association
 507 */
 508struct page_address_map {
 509        struct page *page;
 510        void *virtual;
 511        struct list_head list;
 512};
 513
 514/*
 515 * page_address_map freelist, allocated from page_address_maps.
 516 */
 517static struct list_head page_address_pool;      /* freelist */
 518static spinlock_t pool_lock;                    /* protects page_address_pool */
 519
 520/*
 521 * Hash table bucket
 522 */
 523static struct page_address_slot {
 524        struct list_head lh;                    /* List of page_address_maps */
 525        spinlock_t lock;                        /* Protect this bucket's list */
 526} ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER];
 527
 528static struct page_address_slot *page_slot(struct page *page)
 529{
 530        return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)];
 531}
 532
 533void *page_address(struct page *page)
 534{
 535        unsigned long flags;
 536        void *ret;
 537        struct page_address_slot *pas;
 538
 539        if (!PageHighMem(page))
 540                return lowmem_page_address(page);
 541
 542        pas = page_slot(page);
 543        ret = NULL;
 544        spin_lock_irqsave(&pas->lock, flags);
 545        if (!list_empty(&pas->lh)) {
 546                struct page_address_map *pam;
 547
 548                list_for_each_entry(pam, &pas->lh, list) {
 549                        if (pam->page == page) {
 550                                ret = pam->virtual;
 551                                goto done;
 552                        }
 553                }
 554        }
 555done:
 556        spin_unlock_irqrestore(&pas->lock, flags);
 557        return ret;
 558}
 559
 560EXPORT_SYMBOL(page_address);
 561
 562void set_page_address(struct page *page, void *virtual)
 563{
 564        unsigned long flags;
 565        struct page_address_slot *pas;
 566        struct page_address_map *pam;
 567
 568        BUG_ON(!PageHighMem(page));
 569
 570        pas = page_slot(page);
 571        if (virtual) {          /* Add */
 572                BUG_ON(list_empty(&page_address_pool));
 573
 574                spin_lock_irqsave(&pool_lock, flags);
 575                pam = list_entry(page_address_pool.next,
 576                                struct page_address_map, list);
 577                list_del(&pam->list);
 578                spin_unlock_irqrestore(&pool_lock, flags);
 579
 580                pam->page = page;
 581                pam->virtual = virtual;
 582
 583                spin_lock_irqsave(&pas->lock, flags);
 584                list_add_tail(&pam->list, &pas->lh);
 585                spin_unlock_irqrestore(&pas->lock, flags);
 586        } else {                /* Remove */
 587                spin_lock_irqsave(&pas->lock, flags);
 588                list_for_each_entry(pam, &pas->lh, list) {
 589                        if (pam->page == page) {
 590                                list_del(&pam->list);
 591                                spin_unlock_irqrestore(&pas->lock, flags);
 592                                spin_lock_irqsave(&pool_lock, flags);
 593                                list_add_tail(&pam->list, &page_address_pool);
 594                                spin_unlock_irqrestore(&pool_lock, flags);
 595                                goto done;
 596                        }
 597                }
 598                spin_unlock_irqrestore(&pas->lock, flags);
 599        }
 600done:
 601        return;
 602}
 603
 604static struct page_address_map page_address_maps[LAST_PKMAP];
 605
 606void __init page_address_init(void)
 607{
 608        int i;
 609
 610        INIT_LIST_HEAD(&page_address_pool);
 611        for (i = 0; i < ARRAY_SIZE(page_address_maps); i++)
 612                list_add(&page_address_maps[i].list, &page_address_pool);
 613        for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) {
 614                INIT_LIST_HEAD(&page_address_htable[i].lh);
 615                spin_lock_init(&page_address_htable[i].lock);
 616        }
 617        spin_lock_init(&pool_lock);
 618}
 619
 620#endif  /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */
 621