RHEL4/mm/hugetlb.c
<<
>>
Prefs
   1/*
   2 * Generic hugetlb support.
   3 * (C) William Irwin, April 2004
   4 */
   5#include <linux/gfp.h>
   6#include <linux/list.h>
   7#include <linux/init.h>
   8#include <linux/module.h>
   9#include <linux/mm.h>
  10#include <linux/hugetlb.h>
  11#include <linux/sysctl.h>
  12#include <linux/highmem.h>
  13
  14const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
  15static unsigned long nr_huge_pages, free_huge_pages;
  16unsigned long max_huge_pages;
  17static struct list_head hugepage_freelists[MAX_NUMNODES];
  18static unsigned int nr_huge_pages_node[MAX_NUMNODES];
  19static unsigned int free_huge_pages_node[MAX_NUMNODES];
  20
  21/*
  22 * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
  23 */
  24static spinlock_t hugetlb_lock = SPIN_LOCK_UNLOCKED;
  25
  26static void enqueue_huge_page(struct page *page)
  27{
  28        int nid = page_to_nid(page);
  29        list_add(&page->lru, &hugepage_freelists[nid]);
  30        free_huge_pages++;
  31        free_huge_pages_node[nid]++;
  32}
  33
  34static struct page *dequeue_huge_page(void)
  35{
  36        int nid = numa_node_id();
  37        struct page *page = NULL;
  38
  39        if (list_empty(&hugepage_freelists[nid])) {
  40                for (nid = 0; nid < MAX_NUMNODES; ++nid)
  41                        if (!list_empty(&hugepage_freelists[nid]))
  42                                break;
  43        }
  44        if (nid >= 0 && nid < MAX_NUMNODES &&
  45            !list_empty(&hugepage_freelists[nid])) {
  46                page = list_entry(hugepage_freelists[nid].next,
  47                                  struct page, lru);
  48                list_del(&page->lru);
  49                free_huge_pages--;
  50                free_huge_pages_node[nid]--;
  51        }
  52        return page;
  53}
  54
  55static struct page *alloc_fresh_huge_page(void)
  56{
  57        static int nid = 0;
  58        struct page *page;
  59        page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN,
  60                                        HUGETLB_PAGE_ORDER);
  61        nid = (nid + 1) % numnodes;
  62        if (page) {
  63                spin_lock(&hugetlb_lock);
  64                nr_huge_pages++;
  65                nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]++;
  66                spin_unlock(&hugetlb_lock);
  67        }
  68        return page;
  69}
  70
  71void free_huge_page(struct page *page)
  72{
  73        BUG_ON(page_count(page));
  74
  75        INIT_LIST_HEAD(&page->lru);
  76        page[1].mapping = NULL;
  77
  78        spin_lock(&hugetlb_lock);
  79        enqueue_huge_page(page);
  80        spin_unlock(&hugetlb_lock);
  81}
  82
  83struct page *alloc_huge_page(void)
  84{
  85        struct page *page;
  86        int i;
  87
  88        spin_lock(&hugetlb_lock);
  89        page = dequeue_huge_page();
  90        if (!page) {
  91                spin_unlock(&hugetlb_lock);
  92                return NULL;
  93        }
  94        spin_unlock(&hugetlb_lock);
  95        set_page_count(page, 1);
  96        page[1].mapping = (void *)free_huge_page;
  97        for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
  98                clear_highpage(&page[i]);
  99        return page;
 100}
 101
 102static int __init hugetlb_init(void)
 103{
 104        unsigned long i;
 105        struct page *page;
 106
 107        for (i = 0; i < MAX_NUMNODES; ++i)
 108                INIT_LIST_HEAD(&hugepage_freelists[i]);
 109
 110        for (i = 0; i < max_huge_pages; ++i) {
 111                page = alloc_fresh_huge_page();
 112                if (!page)
 113                        break;
 114                spin_lock(&hugetlb_lock);
 115                enqueue_huge_page(page);
 116                spin_unlock(&hugetlb_lock);
 117        }
 118        max_huge_pages = free_huge_pages = nr_huge_pages = i;
 119        printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages);
 120        return 0;
 121}
 122module_init(hugetlb_init);
 123
 124static int __init hugetlb_setup(char *s)
 125{
 126        if (sscanf(s, "%lu", &max_huge_pages) <= 0)
 127                max_huge_pages = 0;
 128        return 1;
 129}
 130__setup("hugepages=", hugetlb_setup);
 131
 132#ifdef CONFIG_SYSCTL
 133static void update_and_free_page(struct page *page)
 134{
 135        int i;
 136        nr_huge_pages--;
 137        nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--;
 138        for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
 139                page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
 140                                1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
 141                                1 << PG_private | 1<< PG_writeback);
 142                set_page_count(&page[i], 0);
 143        }
 144        set_page_count(page, 1);
 145        __free_pages(page, HUGETLB_PAGE_ORDER);
 146}
 147
 148#ifdef CONFIG_HIGHMEM
 149static void try_to_free_low(unsigned long count)
 150{
 151        int i, nid;
 152        for (i = 0; i < MAX_NUMNODES; ++i) {
 153                struct page *page, *next;
 154                list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
 155                        if (PageHighMem(page))
 156                                continue;
 157                        list_del(&page->lru);
 158                        update_and_free_page(page);
 159                        nid = page_zone(page)->zone_pgdat->node_id;
 160                        free_huge_pages--;
 161                        free_huge_pages_node[nid]--;
 162                        if (count >= nr_huge_pages)
 163                                return;
 164                }
 165        }
 166}
 167#else
 168static inline void try_to_free_low(unsigned long count)
 169{
 170}
 171#endif
 172
 173static unsigned long set_max_huge_pages(unsigned long count)
 174{
 175        while (count > nr_huge_pages) {
 176                struct page *page = alloc_fresh_huge_page();
 177                if (!page)
 178                        return nr_huge_pages;
 179                spin_lock(&hugetlb_lock);
 180                enqueue_huge_page(page);
 181                spin_unlock(&hugetlb_lock);
 182        }
 183        if (count >= nr_huge_pages)
 184                return nr_huge_pages;
 185
 186        spin_lock(&hugetlb_lock);
 187        try_to_free_low(count);
 188        while (count < nr_huge_pages) {
 189                struct page *page = dequeue_huge_page();
 190                if (!page)
 191                        break;
 192                update_and_free_page(page);
 193        }
 194        spin_unlock(&hugetlb_lock);
 195        return nr_huge_pages;
 196}
 197
 198int hugetlb_sysctl_handler(struct ctl_table *table, int write,
 199                           struct file *file, void __user *buffer,
 200                           size_t *length, loff_t *ppos)
 201{
 202        proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
 203        max_huge_pages = set_max_huge_pages(max_huge_pages);
 204        return 0;
 205}
 206#endif /* CONFIG_SYSCTL */
 207
 208int hugetlb_report_meminfo(char *buf)
 209{
 210        return sprintf(buf,
 211                        "HugePages_Total: %5lu\n"
 212                        "HugePages_Free:  %5lu\n"
 213                        "Hugepagesize:    %5lu kB\n",
 214                        nr_huge_pages,
 215                        free_huge_pages,
 216                        HPAGE_SIZE/1024);
 217}
 218
 219int hugetlb_report_node_meminfo(int nid, char *buf)
 220{
 221        return sprintf(buf,
 222                "Node %d HugePages_Total: %5u\n"
 223                "Node %d HugePages_Free:  %5u\n",
 224                nid, nr_huge_pages_node[nid],
 225                nid, free_huge_pages_node[nid]);
 226}
 227
 228int is_hugepage_mem_enough(size_t size)
 229{
 230        return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages;
 231}
 232
 233/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
 234unsigned long hugetlb_total_pages(void)
 235{
 236        return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE);
 237}
 238EXPORT_SYMBOL(hugetlb_total_pages);
 239
 240/*
 241 * We cannot handle pagefaults against hugetlb pages at all.  They cause
 242 * handle_mm_fault() to try to instantiate regular-sized pages in the
 243 * hugegpage VMA.  do_page_fault() is supposed to trap this, so BUG is we get
 244 * this far.
 245 */
 246static struct page *hugetlb_nopage(struct vm_area_struct *vma,
 247                                unsigned long address, int *unused)
 248{
 249        BUG();
 250        return NULL;
 251}
 252
 253struct vm_operations_struct hugetlb_vm_ops = {
 254        .nopage = hugetlb_nopage,
 255};
 256
 257void zap_hugepage_range(struct vm_area_struct *vma,
 258                        unsigned long start, unsigned long length)
 259{
 260        struct mm_struct *mm = vma->vm_mm;
 261
 262        spin_lock(&mm->page_table_lock);
 263        unmap_hugepage_range(vma, start, start + length);
 264        spin_unlock(&mm->page_table_lock);
 265}
 266 
 267/*
 268 * On ia64 at least, it is possible to receive a hugetlb fault from a
 269 * stale zero entry left in the TLB from earlier hardware prefetching.
 270 * Low-level arch code should already have flushed the stale entry as
 271 * part of its fault handling, but we do need to accept this minor fault
 272 * and return successfully.  Whereas the "normal" case is that this is
 273 * an access to a hugetlb page which has been truncated off since mmap.
 274 */
 275int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 276                        unsigned long address, int write_access)
 277{
 278        int ret = VM_FAULT_SIGBUS;
 279#if CONFIG_IA64
 280        pte_t *pte;
 281
 282        spin_lock(&mm->page_table_lock);
 283        pte = huge_pte_offset(mm, address);
 284        if (pte && !pte_none(*pte))
 285                ret = VM_FAULT_MINOR;
 286        spin_unlock(&mm->page_table_lock);
 287#endif
 288        return ret;
 289}
 290