RHEL4/mm/vmalloc.c
<<
>>
Prefs
   1/*
   2 *  linux/mm/vmalloc.c
   3 *
   4 *  Copyright (C) 1993  Linus Torvalds
   5 *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
   6 *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
   7 *  Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
   8 */
   9
  10#include <linux/mm.h>
  11#include <linux/module.h>
  12#include <linux/highmem.h>
  13#include <linux/slab.h>
  14#include <linux/spinlock.h>
  15#include <linux/interrupt.h>
  16
  17#include <linux/vmalloc.h>
  18
  19#include <asm/uaccess.h>
  20#include <asm/tlbflush.h>
  21
  22
  23rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
  24struct vm_struct *vmlist;
  25
  26static void unmap_area_pte(pmd_t *pmd, unsigned long address,
  27                                  unsigned long size)
  28{
  29        unsigned long end;
  30        pte_t *pte;
  31
  32        if (pmd_none(*pmd))
  33                return;
  34        if (pmd_bad(*pmd)) {
  35                pmd_ERROR(*pmd);
  36                pmd_clear(pmd);
  37                return;
  38        }
  39
  40        pte = pte_offset_kernel(pmd, address);
  41        address &= ~PMD_MASK;
  42        end = address + size;
  43        if (end > PMD_SIZE)
  44                end = PMD_SIZE;
  45
  46        do {
  47                pte_t page;
  48                page = ptep_get_and_clear(pte);
  49                address += PAGE_SIZE;
  50                pte++;
  51                if (pte_none(page))
  52                        continue;
  53                if (pte_present(page))
  54                        continue;
  55                printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n");
  56        } while (address < end);
  57}
  58
  59static void unmap_area_pmd(pgd_t *dir, unsigned long address,
  60                                  unsigned long size)
  61{
  62        unsigned long end;
  63        pmd_t *pmd;
  64
  65        if (pgd_none(*dir))
  66                return;
  67        if (pgd_bad(*dir)) {
  68                pgd_ERROR(*dir);
  69                pgd_clear(dir);
  70                return;
  71        }
  72
  73        pmd = pmd_offset(dir, address);
  74        address &= ~PGDIR_MASK;
  75        end = address + size;
  76        if (end > PGDIR_SIZE)
  77                end = PGDIR_SIZE;
  78
  79        do {
  80                unmap_area_pte(pmd, address, end - address);
  81                address = (address + PMD_SIZE) & PMD_MASK;
  82                pmd++;
  83        } while (address < end);
  84}
  85
  86static int map_area_pte(pte_t *pte, unsigned long address,
  87                               unsigned long size, pgprot_t prot,
  88                               struct page ***pages)
  89{
  90        unsigned long end;
  91
  92        address &= ~PMD_MASK;
  93        end = address + size;
  94        if (end > PMD_SIZE)
  95                end = PMD_SIZE;
  96
  97        do {
  98                struct page *page = **pages;
  99
 100                WARN_ON(!pte_none(*pte));
 101                if (!page)
 102                        return -ENOMEM;
 103
 104                set_pte(pte, mk_pte(page, prot));
 105                address += PAGE_SIZE;
 106                pte++;
 107                (*pages)++;
 108        } while (address < end);
 109        return 0;
 110}
 111
 112static int map_area_pmd(pmd_t *pmd, unsigned long address,
 113                               unsigned long size, pgprot_t prot,
 114                               struct page ***pages)
 115{
 116        unsigned long base, end;
 117
 118        base = address & PGDIR_MASK;
 119        address &= ~PGDIR_MASK;
 120        end = address + size;
 121        if (end > PGDIR_SIZE)
 122                end = PGDIR_SIZE;
 123
 124        do {
 125                pte_t * pte = pte_alloc_kernel(&init_mm, pmd, base + address);
 126                if (!pte)
 127                        return -ENOMEM;
 128                if (map_area_pte(pte, address, end - address, prot, pages))
 129                        return -ENOMEM;
 130                address = (address + PMD_SIZE) & PMD_MASK;
 131                pmd++;
 132        } while (address < end);
 133
 134        return 0;
 135}
 136
 137void unmap_vm_area(struct vm_struct *area)
 138{
 139        unsigned long address = (unsigned long) area->addr;
 140        unsigned long end = (address + area->size);
 141        pgd_t *dir;
 142
 143        dir = pgd_offset_k(address);
 144        flush_cache_vunmap(address, end);
 145        do {
 146                unmap_area_pmd(dir, address, end - address);
 147                address = (address + PGDIR_SIZE) & PGDIR_MASK;
 148                dir++;
 149        } while (address && (address < end));
 150        flush_tlb_kernel_range((unsigned long) area->addr, end);
 151}
 152
 153int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
 154{
 155        unsigned long address = (unsigned long) area->addr;
 156        unsigned long end = address + (area->size-PAGE_SIZE);
 157        pgd_t *dir;
 158        int err = 0;
 159
 160        dir = pgd_offset_k(address);
 161        spin_lock(&init_mm.page_table_lock);
 162        do {
 163                pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
 164                if (!pmd) {
 165                        err = -ENOMEM;
 166                        break;
 167                }
 168                if (map_area_pmd(pmd, address, end - address, prot, pages)) {
 169                        err = -ENOMEM;
 170                        break;
 171                }
 172
 173                address = (address + PGDIR_SIZE) & PGDIR_MASK;
 174                dir++;
 175        } while (address && (address < end));
 176
 177        spin_unlock(&init_mm.page_table_lock);
 178        flush_cache_vmap((unsigned long) area->addr, end);
 179        return err;
 180}
 181
 182#define IOREMAP_MAX_ORDER       (7 + PAGE_SHIFT)        /* 128 pages */
 183
 184struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
 185                                unsigned long start, unsigned long end)
 186{
 187        struct vm_struct **p, *tmp, *area;
 188        unsigned long align = 1;
 189        unsigned long addr;
 190
 191        if (flags & VM_IOREMAP) {
 192                int bit = fls(size);
 193
 194                if (bit > IOREMAP_MAX_ORDER)
 195                        bit = IOREMAP_MAX_ORDER;
 196                else if (bit < PAGE_SHIFT)
 197                        bit = PAGE_SHIFT;
 198
 199                align = 1ul << bit;
 200        }
 201        addr = ALIGN(start, align);
 202
 203        area = kmalloc(sizeof(*area), GFP_KERNEL);
 204        if (unlikely(!area))
 205                return NULL;
 206
 207        /*
 208         * We always allocate a guard page.
 209         */
 210        size += PAGE_SIZE;
 211        if (unlikely(!size)) {
 212                kfree (area);
 213                return NULL;
 214        }
 215
 216        write_lock(&vmlist_lock);
 217        for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) {
 218                if ((unsigned long)tmp->addr < addr) {
 219                        if((unsigned long)tmp->addr + tmp->size >= addr)
 220                                addr = ALIGN(tmp->size + 
 221                                             (unsigned long)tmp->addr, align);
 222                        continue;
 223                }
 224                if ((size + addr) < addr)
 225                        goto out;
 226                if (size + addr <= (unsigned long)tmp->addr)
 227                        goto found;
 228                addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align);
 229                if (addr > end - size)
 230                        goto out;
 231        }
 232
 233found:
 234        area->next = *p;
 235        *p = area;
 236
 237        area->flags = flags;
 238        area->addr = (void *)addr;
 239        area->size = size;
 240        area->pages = NULL;
 241        area->nr_pages = 0;
 242        area->phys_addr = 0;
 243        write_unlock(&vmlist_lock);
 244
 245        return area;
 246
 247out:
 248        write_unlock(&vmlist_lock);
 249        kfree(area);
 250        if (printk_ratelimit())
 251                printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n");
 252        return NULL;
 253}
 254
 255/**
 256 *      get_vm_area  -  reserve a contingous kernel virtual area
 257 *
 258 *      @size:          size of the area
 259 *      @flags:         %VM_IOREMAP for I/O mappings or VM_ALLOC
 260 *
 261 *      Search an area of @size in the kernel virtual mapping area,
 262 *      and reserved it for out purposes.  Returns the area descriptor
 263 *      on success or %NULL on failure.
 264 */
 265struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
 266{
 267        return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END);
 268}
 269
 270/**
 271 *      remove_vm_area  -  find and remove a contingous kernel virtual area
 272 *
 273 *      @addr:          base address
 274 *
 275 *      Search for the kernel VM area starting at @addr, and remove it.
 276 *      This function returns the found VM area, but using it is NOT safe
 277 *      on SMP machines.
 278 */
 279struct vm_struct *remove_vm_area(void *addr)
 280{
 281        struct vm_struct **p, *tmp;
 282
 283        write_lock(&vmlist_lock);
 284        for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) {
 285                 if (tmp->addr == addr)
 286                         goto found;
 287        }
 288        write_unlock(&vmlist_lock);
 289        return NULL;
 290
 291found:
 292        unmap_vm_area(tmp);
 293        *p = tmp->next;
 294        write_unlock(&vmlist_lock);
 295        return tmp;
 296}
 297
 298void __vunmap(void *addr, int deallocate_pages)
 299{
 300        struct vm_struct *area;
 301
 302        if (!addr)
 303                return;
 304
 305        if ((PAGE_SIZE-1) & (unsigned long)addr) {
 306                printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
 307                WARN_ON(1);
 308                return;
 309        }
 310
 311        area = remove_vm_area(addr);
 312        if (unlikely(!area)) {
 313                printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
 314                                addr);
 315                WARN_ON(1);
 316                return;
 317        }
 318        
 319        if (deallocate_pages) {
 320                int i;
 321
 322                for (i = 0; i < area->nr_pages; i++) {
 323                        if (unlikely(!area->pages[i]))
 324                                BUG();
 325                        __free_page(area->pages[i]);
 326                }
 327
 328                if (area->nr_pages > PAGE_SIZE/sizeof(struct page *))
 329                        vfree(area->pages);
 330                else
 331                        kfree(area->pages);
 332        }
 333
 334        kfree(area);
 335        return;
 336}
 337
 338/**
 339 *      vfree  -  release memory allocated by vmalloc()
 340 *
 341 *      @addr:          memory base address
 342 *
 343 *      Free the virtually contiguous memory area starting at @addr, as
 344 *      obtained from vmalloc(), vmalloc_32() or __vmalloc().
 345 *
 346 *      May not be called in interrupt context.
 347 */
 348void vfree(void *addr)
 349{
 350        BUG_ON(in_interrupt());
 351        __vunmap(addr, 1);
 352}
 353
 354EXPORT_SYMBOL(vfree);
 355
 356/**
 357 *      vunmap  -  release virtual mapping obtained by vmap()
 358 *
 359 *      @addr:          memory base address
 360 *
 361 *      Free the virtually contiguous memory area starting at @addr,
 362 *      which was created from the page array passed to vmap().
 363 *
 364 *      May not be called in interrupt context.
 365 */
 366void vunmap(void *addr)
 367{
 368        BUG_ON(in_interrupt());
 369        __vunmap(addr, 0);
 370}
 371
 372EXPORT_SYMBOL(vunmap);
 373
 374/**
 375 *      vmap  -  map an array of pages into virtually contiguous space
 376 *
 377 *      @pages:         array of page pointers
 378 *      @count:         number of pages to map
 379 *      @flags:         vm_area->flags
 380 *      @prot:          page protection for the mapping
 381 *
 382 *      Maps @count pages from @pages into contiguous kernel virtual
 383 *      space.
 384 */
 385void *vmap(struct page **pages, unsigned int count,
 386                unsigned long flags, pgprot_t prot)
 387{
 388        struct vm_struct *area;
 389
 390        if (count > num_physpages)
 391                return NULL;
 392
 393        area = get_vm_area((count << PAGE_SHIFT), flags);
 394        if (!area)
 395                return NULL;
 396        if (map_vm_area(area, prot, &pages)) {
 397                vunmap(area->addr);
 398                return NULL;
 399        }
 400
 401        return area->addr;
 402}
 403
 404EXPORT_SYMBOL(vmap);
 405
 406/**
 407 *      __vmalloc  -  allocate virtually contiguous memory
 408 *
 409 *      @size:          allocation size
 410 *      @gfp_mask:      flags for the page level allocator
 411 *      @prot:          protection mask for the allocated pages
 412 *
 413 *      Allocate enough pages to cover @size from the page level
 414 *      allocator with @gfp_mask flags.  Map them into contiguous
 415 *      kernel virtual space, using a pagetable protection of @prot.
 416 */
 417void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
 418{
 419        struct vm_struct *area;
 420        struct page **pages;
 421        unsigned int nr_pages, array_size, i;
 422
 423        size = PAGE_ALIGN(size);
 424        if (!size || (size >> PAGE_SHIFT) > num_physpages)
 425                return NULL;
 426
 427        area = get_vm_area(size, VM_ALLOC);
 428        if (!area)
 429                return NULL;
 430
 431        nr_pages = size >> PAGE_SHIFT;
 432        array_size = (nr_pages * sizeof(struct page *));
 433
 434        area->nr_pages = nr_pages;
 435        if (array_size > PAGE_SIZE)
 436                pages = __vmalloc(array_size, gfp_mask, PAGE_KERNEL);
 437        else
 438                pages = kmalloc(array_size, (gfp_mask & ~__GFP_HIGHMEM));
 439        area->pages = pages;
 440        if (!area->pages) {
 441                remove_vm_area(area->addr);
 442                kfree(area);
 443                return NULL;
 444        }
 445        memset(area->pages, 0, array_size);
 446
 447        for (i = 0; i < area->nr_pages; i++) {
 448                area->pages[i] = alloc_page(gfp_mask);
 449                if (unlikely(!area->pages[i])) {
 450                        /* Successfully allocated i pages, free them in __vunmap() */
 451                        area->nr_pages = i;
 452                        goto fail;
 453                }
 454        }
 455        
 456        if (map_vm_area(area, prot, &pages))
 457                goto fail;
 458        return area->addr;
 459
 460fail:
 461        vfree(area->addr);
 462        return NULL;
 463}
 464
 465EXPORT_SYMBOL(__vmalloc);
 466
 467/**
 468 *      vmalloc  -  allocate virtually contiguous memory
 469 *
 470 *      @size:          allocation size
 471 *
 472 *      Allocate enough pages to cover @size from the page level
 473 *      allocator and map them into contiguous kernel virtual space.
 474 *
 475 *      For tight cotrol over page level allocator and protection flags
 476 *      use __vmalloc() instead.
 477 */
 478void *vmalloc(unsigned long size)
 479{
 480       return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
 481}
 482
 483EXPORT_SYMBOL(vmalloc);
 484
 485/**
 486 *      vmalloc_exec  -  allocate virtually contiguous, executable memory
 487 *
 488 *      @size:          allocation size
 489 *
 490 *      Kernel-internal function to allocate enough pages to cover @size
 491 *      the page level allocator and map them into contiguous and
 492 *      executable kernel virtual space.
 493 *
 494 *      For tight cotrol over page level allocator and protection flags
 495 *      use __vmalloc() instead.
 496 */
 497
 498#ifndef PAGE_KERNEL_EXEC
 499# define PAGE_KERNEL_EXEC PAGE_KERNEL
 500#endif
 501
 502void *vmalloc_exec(unsigned long size)
 503{
 504        return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
 505}
 506
 507/**
 508 *      vmalloc_32  -  allocate virtually contiguous memory (32bit addressable)
 509 *
 510 *      @size:          allocation size
 511 *
 512 *      Allocate enough 32bit PA addressable pages to cover @size from the
 513 *      page level allocator and map them into contiguous kernel virtual space.
 514 */
 515void *vmalloc_32(unsigned long size)
 516{
 517        return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
 518}
 519
 520EXPORT_SYMBOL(vmalloc_32);
 521
 522long vread(char *buf, char *addr, unsigned long count)
 523{
 524        struct vm_struct *tmp;
 525        char *vaddr, *buf_start = buf;
 526        unsigned long n;
 527
 528        /* Don't allow overflow */
 529        if ((unsigned long) addr + count < count)
 530                count = -(unsigned long) addr;
 531
 532        read_lock(&vmlist_lock);
 533        for (tmp = vmlist; tmp; tmp = tmp->next) {
 534                vaddr = (char *) tmp->addr;
 535                if (addr >= vaddr + tmp->size - PAGE_SIZE)
 536                        continue;
 537                while (addr < vaddr) {
 538                        if (count == 0)
 539                                goto finished;
 540                        *buf = '\0';
 541                        buf++;
 542                        addr++;
 543                        count--;
 544                }
 545                n = vaddr + tmp->size - PAGE_SIZE - addr;
 546                do {
 547                        if (count == 0)
 548                                goto finished;
 549                        *buf = *addr;
 550                        buf++;
 551                        addr++;
 552                        count--;
 553                } while (--n > 0);
 554        }
 555finished:
 556        read_unlock(&vmlist_lock);
 557        return buf - buf_start;
 558}
 559
 560long vwrite(char *buf, char *addr, unsigned long count)
 561{
 562        struct vm_struct *tmp;
 563        char *vaddr, *buf_start = buf;
 564        unsigned long n;
 565
 566        /* Don't allow overflow */
 567        if ((unsigned long) addr + count < count)
 568                count = -(unsigned long) addr;
 569
 570        read_lock(&vmlist_lock);
 571        for (tmp = vmlist; tmp; tmp = tmp->next) {
 572                vaddr = (char *) tmp->addr;
 573                if (addr >= vaddr + tmp->size - PAGE_SIZE)
 574                        continue;
 575                while (addr < vaddr) {
 576                        if (count == 0)
 577                                goto finished;
 578                        buf++;
 579                        addr++;
 580                        count--;
 581                }
 582                n = vaddr + tmp->size - PAGE_SIZE - addr;
 583                do {
 584                        if (count == 0)
 585                                goto finished;
 586                        *addr = *buf;
 587                        buf++;
 588                        addr++;
 589                        count--;
 590                } while (--n > 0);
 591        }
 592finished:
 593        read_unlock(&vmlist_lock);
 594        return buf - buf_start;
 595}
 596