1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/config.h>
18#include <linux/stddef.h>
19#include <linux/mm.h>
20#include <linux/swap.h>
21#include <linux/interrupt.h>
22#include <linux/pagemap.h>
23#include <linux/bootmem.h>
24#include <linux/compiler.h>
25#include <linux/module.h>
26#include <linux/suspend.h>
27#include <linux/pagevec.h>
28#include <linux/blkdev.h>
29#include <linux/slab.h>
30#include <linux/notifier.h>
31#include <linux/topology.h>
32#include <linux/sysctl.h>
33#include <linux/cpu.h>
34
35#include <asm/tlbflush.h>
36
37DECLARE_BITMAP(node_online_map, MAX_NUMNODES);
38struct pglist_data *pgdat_list;
39unsigned long totalram_pages;
40unsigned long totalhigh_pages;
41long nr_swap_pages;
42int percpu_pagelist_fraction;
43int numnodes = 1;
44int sysctl_lower_zone_protection = 0;
45
46EXPORT_SYMBOL(totalram_pages);
47EXPORT_SYMBOL(nr_swap_pages);
48
49
50
51
52
53struct zone *zone_table[1 << (ZONES_SHIFT + NODES_SHIFT)];
54EXPORT_SYMBOL(zone_table);
55
56static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
57int min_free_kbytes = 1024;
58
59unsigned long __initdata nr_kernel_pages;
60unsigned long __initdata nr_all_pages;
61
62#ifdef CONFIG_HIGHMEM
63extern atomic_t bouncepages;
64#endif
65
66
67
68
69
70static int bad_range(struct zone *zone, struct page *page)
71{
72 if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages)
73 return 1;
74 if (page_to_pfn(page) < zone->zone_start_pfn)
75 return 1;
76 if (zone != page_zone(page))
77 return 1;
78 return 0;
79}
80
81static void bad_page(const char *function, struct page *page)
82{
83 printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
84 function, current->comm, page);
85 printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
86 (int)(2*sizeof(page_flags_t)), (unsigned long)page->flags,
87 page->mapping, page_mapcount(page), page_count(page));
88 printk(KERN_EMERG "Backtrace:\n");
89 dump_stack();
90 printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n");
91 page->flags &= ~(1 << PG_private |
92 1 << PG_locked |
93 1 << PG_lru |
94 1 << PG_active |
95 1 << PG_dirty |
96 1 << PG_swapcache |
97 1 << PG_writeback);
98 set_page_count(page, 0);
99 reset_page_mapcount(page);
100 page->mapping = NULL;
101 tainted |= TAINT_BAD_PAGE;
102}
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121static void prep_compound_page(struct page *page, unsigned long order)
122{
123 int i;
124 int nr_pages = 1 << order;
125
126 page[1].mapping = NULL;
127 page[1].index = order;
128 for (i = 0; i < nr_pages; i++) {
129 struct page *p = page + i;
130
131 SetPageCompound(p);
132 p->private = (unsigned long)page;
133 }
134}
135
136static void destroy_compound_page(struct page *page, unsigned long order)
137{
138 int i;
139 int nr_pages = 1 << order;
140
141 if (!PageCompound(page))
142 return;
143
144 if (page[1].index != order)
145 bad_page(__FUNCTION__, page);
146
147 for (i = 0; i < nr_pages; i++) {
148 struct page *p = page + i;
149
150 if (!PageCompound(p))
151 bad_page(__FUNCTION__, page);
152 if (p->private != (unsigned long)page)
153 bad_page(__FUNCTION__, page);
154 ClearPageCompound(p);
155 }
156}
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181static inline void __free_pages_bulk (struct page *page, struct page *base,
182 struct zone *zone, struct free_area *area, unsigned int order)
183{
184 unsigned long page_idx, index, mask;
185
186 if (order)
187 destroy_compound_page(page, order);
188 mask = (~0UL) << order;
189 page_idx = page - base;
190 if (page_idx & ~mask)
191 BUG();
192 index = page_idx >> (1 + order);
193
194 zone->free_pages += 1 << order;
195 while (order < MAX_ORDER-1) {
196 struct page *buddy1, *buddy2;
197
198 BUG_ON(area >= zone->free_area + MAX_ORDER);
199 if (!__test_and_change_bit(index, area->map))
200
201
202
203 break;
204
205
206 buddy1 = base + (page_idx ^ (1 << order));
207 buddy2 = base + page_idx;
208 BUG_ON(bad_range(zone, buddy1));
209 BUG_ON(bad_range(zone, buddy2));
210 list_del(&buddy1->lru);
211 mask <<= 1;
212 order++;
213 area++;
214 index >>= 1;
215 page_idx &= mask;
216 }
217 list_add(&(base + page_idx)->lru, &area->free_list);
218}
219
220static inline void free_pages_check(const char *function, struct page *page)
221{
222 if ( page_mapped(page) ||
223 page->mapping != NULL ||
224 page_count(page) != 0 ||
225 (page->flags & (
226 1 << PG_lru |
227 1 << PG_private |
228 1 << PG_locked |
229 1 << PG_active |
230 1 << PG_reclaim |
231 1 << PG_slab |
232 1 << PG_swapcache |
233 1 << PG_writeback )))
234 bad_page(function, page);
235 if (PageDirty(page))
236 ClearPageDirty(page);
237}
238
239
240
241
242
243
244
245
246
247
248
249
250static int
251free_pages_bulk(struct zone *zone, int count,
252 struct list_head *list, unsigned int order)
253{
254 unsigned long flags;
255 struct free_area *area;
256 struct page *base, *page = NULL;
257 int ret = 0;
258
259 base = zone->zone_mem_map;
260 area = zone->free_area + order;
261 spin_lock_irqsave(&zone->lock, flags);
262 zone->all_unreclaimable = 0;
263 zone->pages_scanned = 0;
264 while (!list_empty(list) && count--) {
265 page = list_entry(list->prev, struct page, lru);
266
267 list_del(&page->lru);
268 __free_pages_bulk(page, base, zone, area, order);
269 ret++;
270 }
271 spin_unlock_irqrestore(&zone->lock, flags);
272 return ret;
273}
274
275void __free_pages_ok(struct page *page, unsigned int order)
276{
277 LIST_HEAD(list);
278 int i;
279
280 if(arch_free_page(page, order))
281 return;
282
283 mod_page_state(pgfree, 1 << order);
284 for (i = 0 ; i < (1 << order) ; ++i)
285 free_pages_check(__FUNCTION__, page + i);
286 list_add(&page->lru, &list);
287 kernel_map_pages(page, 1<<order, 0);
288 free_pages_bulk(page_zone(page), 1, &list, order);
289}
290
291#define MARK_USED(index, order, area) \
292 __change_bit((index) >> (1+(order)), (area)->map)
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308static inline struct page *
309expand(struct zone *zone, struct page *page,
310 unsigned long index, int low, int high, struct free_area *area)
311{
312 unsigned long size = 1 << high;
313
314 while (high > low) {
315 area--;
316 high--;
317 size >>= 1;
318 BUG_ON(bad_range(zone, &page[size]));
319 list_add(&page[size].lru, &area->free_list);
320 MARK_USED(index + size, high, area);
321 }
322 return page;
323}
324
325static inline void set_page_refs(struct page *page, int order)
326{
327#ifdef CONFIG_MMU
328 set_page_count(page, 1);
329#else
330 int i;
331
332
333
334
335
336 for (i = 0; i < (1 << order); i++)
337 set_page_count(page+i, 1);
338#endif
339}
340
341
342
343
344static void prep_new_page(struct page *page, int order)
345{
346 if (page->mapping || page_mapped(page) ||
347 (page->flags & (
348 1 << PG_private |
349 1 << PG_locked |
350 1 << PG_lru |
351 1 << PG_active |
352 1 << PG_dirty |
353 1 << PG_reclaim |
354 1 << PG_swapcache |
355 1 << PG_writeback )))
356 bad_page(__FUNCTION__, page);
357
358 page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
359 1 << PG_referenced | 1 << PG_arch_1 |
360 1 << PG_checked | 1 << PG_mappedtodisk);
361 page->private = 0;
362 set_page_refs(page, order);
363}
364
365
366
367
368
369static struct page *__rmqueue(struct zone *zone, unsigned int order)
370{
371 struct free_area * area;
372 unsigned int current_order;
373 struct page *page;
374 unsigned int index;
375
376 for (current_order = order; current_order < MAX_ORDER; ++current_order) {
377 area = zone->free_area + current_order;
378 if (list_empty(&area->free_list))
379 continue;
380
381 page = list_entry(area->free_list.next, struct page, lru);
382 list_del(&page->lru);
383 index = page - zone->zone_mem_map;
384 if (current_order != MAX_ORDER-1)
385 MARK_USED(index, current_order, area);
386 zone->free_pages -= 1UL << order;
387 return expand(zone, page, index, order, current_order, area);
388 }
389
390 return NULL;
391}
392
393
394
395
396
397
398static int rmqueue_bulk(struct zone *zone, unsigned int order,
399 unsigned long count, struct list_head *list)
400{
401 unsigned long flags;
402 int i;
403 int allocated = 0;
404 struct page *page;
405
406 spin_lock_irqsave(&zone->lock, flags);
407 for (i = 0; i < count; ++i) {
408 page = __rmqueue(zone, order);
409 if (page == NULL)
410 break;
411 allocated++;
412 list_add_tail(&page->lru, list);
413 }
414 spin_unlock_irqrestore(&zone->lock, flags);
415 return allocated;
416}
417
418#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
419static void __drain_pages(unsigned int cpu)
420{
421 struct zone *zone;
422 int i;
423
424 for_each_zone(zone) {
425 struct per_cpu_pageset *pset;
426
427 pset = &zone->pageset[cpu];
428 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
429 struct per_cpu_pages *pcp;
430
431 pcp = &pset->pcp[i];
432 pcp->count -= free_pages_bulk(zone, pcp->count,
433 &pcp->list, 0);
434 }
435 }
436}
437#endif
438
439#ifdef CONFIG_PM
440int is_head_of_free_region(struct page *page)
441{
442 struct zone *zone = page_zone(page);
443 unsigned long flags;
444 int order;
445 struct list_head *curr;
446
447
448
449
450
451 spin_lock_irqsave(&zone->lock, flags);
452 for (order = MAX_ORDER - 1; order >= 0; --order)
453 list_for_each(curr, &zone->free_area[order].free_list)
454 if (page == list_entry(curr, struct page, lru)) {
455 spin_unlock_irqrestore(&zone->lock, flags);
456 return 1 << order;
457 }
458 spin_unlock_irqrestore(&zone->lock, flags);
459 return 0;
460}
461
462
463
464
465void drain_local_pages(void)
466{
467 unsigned long flags;
468
469 local_irq_save(flags);
470 __drain_pages(smp_processor_id());
471 local_irq_restore(flags);
472}
473#endif
474
475static void zone_statistics(struct zonelist *zonelist, struct zone *z)
476{
477#ifdef CONFIG_NUMA
478 unsigned long flags;
479 int cpu;
480 pg_data_t *pg = z->zone_pgdat;
481 pg_data_t *orig = zonelist->zones[0]->zone_pgdat;
482 struct per_cpu_pageset *p;
483
484 local_irq_save(flags);
485 cpu = smp_processor_id();
486 p = &z->pageset[cpu];
487 if (pg == orig) {
488 z->pageset[cpu].numa_hit++;
489 } else {
490 p->numa_miss++;
491 zonelist->zones[0]->pageset[cpu].numa_foreign++;
492 }
493 if (pg == NODE_DATA(numa_node_id()))
494 p->local_node++;
495 else
496 p->other_node++;
497 local_irq_restore(flags);
498#endif
499}
500
501
502
503
504static void FASTCALL(free_hot_cold_page(struct page *page, int cold));
505static void fastcall free_hot_cold_page(struct page *page, int cold)
506{
507 struct zone *zone = page_zone(page);
508 struct per_cpu_pages *pcp;
509 unsigned long flags;
510
511 if (arch_free_page(page, 0))
512 return;
513
514 kernel_map_pages(page, 1, 0);
515 inc_page_state(pgfree);
516 if (PageAnon(page))
517 page->mapping = NULL;
518 free_pages_check(__FUNCTION__, page);
519 pcp = &zone->pageset[get_cpu()].pcp[cold];
520 local_irq_save(flags);
521 if (pcp->count >= pcp->high)
522 pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
523 list_add(&page->lru, &pcp->list);
524 pcp->count++;
525 local_irq_restore(flags);
526 put_cpu();
527}
528
529void fastcall free_hot_page(struct page *page)
530{
531 free_hot_cold_page(page, 0);
532}
533
534void fastcall free_cold_page(struct page *page)
535{
536 free_hot_cold_page(page, 1);
537}
538
539
540
541
542
543
544
545static struct page *
546buffered_rmqueue(struct zone *zone, int order, int gfp_flags)
547{
548 unsigned long flags;
549 struct page *page = NULL;
550 int cold = !!(gfp_flags & __GFP_COLD);
551
552 if (order == 0) {
553 struct per_cpu_pages *pcp;
554
555 pcp = &zone->pageset[get_cpu()].pcp[cold];
556 local_irq_save(flags);
557 if (pcp->count <= pcp->low)
558 pcp->count += rmqueue_bulk(zone, 0,
559 pcp->batch, &pcp->list);
560 if (pcp->count) {
561 page = list_entry(pcp->list.next, struct page, lru);
562 list_del(&page->lru);
563 pcp->count--;
564 }
565 local_irq_restore(flags);
566 put_cpu();
567 }
568
569 if (page == NULL) {
570 spin_lock_irqsave(&zone->lock, flags);
571 page = __rmqueue(zone, order);
572 spin_unlock_irqrestore(&zone->lock, flags);
573 }
574
575 if (page != NULL) {
576 BUG_ON(bad_range(zone, page));
577 mod_page_state_zone(zone, pgalloc, 1 << order);
578 prep_new_page(page, order);
579 if (order && (gfp_flags & __GFP_COMP))
580 prep_compound_page(page, order);
581 }
582 return page;
583}
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601struct page * fastcall
602__alloc_pages(unsigned int gfp_mask, unsigned int order,
603 struct zonelist *zonelist)
604{
605 const int wait = gfp_mask & __GFP_WAIT;
606 unsigned long min;
607 struct zone **zones, *z;
608 struct page *page;
609 struct reclaim_state reclaim_state;
610 struct task_struct *p = current;
611 int i;
612 int alloc_type;
613 int do_retry;
614 int can_try_harder;
615
616 might_sleep_if(wait);
617
618
619
620
621
622
623 can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
624
625 zones = zonelist->zones;
626
627 if (unlikely(zones[0] == NULL)) {
628
629 return NULL;
630 }
631
632 alloc_type = zone_idx(zones[0]);
633
634
635 for (i = 0; (z = zones[i]) != NULL; i++) {
636 min = z->pages_low + (1<<order) + z->protection[alloc_type];
637
638 if (z->free_pages < min)
639 continue;
640
641 page = buffered_rmqueue(z, order, gfp_mask);
642 if (page)
643 goto got_pg;
644 }
645
646 for (i = 0; (z = zones[i]) != NULL; i++)
647 wakeup_kswapd(z);
648
649
650
651
652
653 for (i = 0; (z = zones[i]) != NULL; i++) {
654 min = z->pages_min;
655 if (gfp_mask & __GFP_HIGH)
656 min /= 2;
657 if (can_try_harder)
658 min -= min / 4;
659 min += (1<<order) + z->protection[alloc_type];
660
661 if (z->free_pages < min)
662 continue;
663
664 page = buffered_rmqueue(z, order, gfp_mask);
665 if (page)
666 goto got_pg;
667 }
668
669
670 if ((p->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
671
672 for (i = 0; (z = zones[i]) != NULL; i++) {
673 page = buffered_rmqueue(z, order, gfp_mask);
674 if (page)
675 goto got_pg;
676 }
677 goto nopage;
678 }
679
680
681 if (!wait)
682 goto nopage;
683
684rebalance:
685
686 if (p->flags & PF_MEMDIE)
687 goto nopage;
688 p->flags |= PF_MEMALLOC;
689 reclaim_state.reclaimed_slab = 0;
690 p->reclaim_state = &reclaim_state;
691 try_to_free_pages(zones, gfp_mask, order, can_try_harder, alloc_type);
692
693 p->reclaim_state = NULL;
694 p->flags &= ~PF_MEMALLOC;
695
696
697 for (i = 0; (z = zones[i]) != NULL; i++) {
698 min = zone_min(z,gfp_mask,can_try_harder,alloc_type,order);
699
700 if (z->free_pages < min)
701 continue;
702 page = buffered_rmqueue(z, order, gfp_mask);
703 if (page)
704 goto got_pg;
705 }
706
707
708
709
710
711
712
713
714 do_retry = 0;
715 if (!(gfp_mask & __GFP_NORETRY)) {
716 if ((order <= 3) || (gfp_mask & __GFP_REPEAT))
717 do_retry = 1;
718 if (gfp_mask & __GFP_NOFAIL)
719 do_retry = 1;
720 }
721 if (do_retry) {
722 blk_congestion_wait(WRITE, HZ/50);
723 goto rebalance;
724 }
725
726nopage:
727 if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
728 printk(KERN_WARNING "%s: page allocation failure."
729 " order:%d, mode:0x%x\n",
730 p->comm, order, gfp_mask);
731 dump_stack();
732 show_mem();
733 }
734 return NULL;
735got_pg:
736 zone_statistics(zonelist, z);
737 kernel_map_pages(page, 1 << order, 1);
738 return page;
739}
740
741EXPORT_SYMBOL(__alloc_pages);
742
743
744
745
746fastcall unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int order)
747{
748 struct page * page;
749 page = alloc_pages(gfp_mask, order);
750 if (!page)
751 return 0;
752 return (unsigned long) page_address(page);
753}
754
755EXPORT_SYMBOL(__get_free_pages);
756
757fastcall unsigned long get_zeroed_page(unsigned int gfp_mask)
758{
759 struct page * page;
760
761
762
763
764
765 BUG_ON(gfp_mask & __GFP_HIGHMEM);
766
767 page = alloc_pages(gfp_mask, 0);
768 if (page) {
769 void *address = page_address(page);
770 clear_page(address);
771 return (unsigned long) address;
772 }
773 return 0;
774}
775
776EXPORT_SYMBOL(get_zeroed_page);
777
778void __pagevec_free(struct pagevec *pvec)
779{
780 int i = pagevec_count(pvec);
781
782 while (--i >= 0)
783 free_hot_cold_page(pvec->pages[i], pvec->cold);
784}
785
786fastcall void __free_pages(struct page *page, unsigned int order)
787{
788 if (!PageReserved(page) && put_page_testzero(page)) {
789 if (order == 0)
790 free_hot_page(page);
791 else
792 __free_pages_ok(page, order);
793 }
794}
795
796EXPORT_SYMBOL(__free_pages);
797
798fastcall void free_pages(unsigned long addr, unsigned int order)
799{
800 if (addr != 0) {
801 BUG_ON(!virt_addr_valid((void *)addr));
802 __free_pages(virt_to_page((void *)addr), order);
803 }
804}
805
806EXPORT_SYMBOL(free_pages);
807
808
809
810
811unsigned int nr_free_pages(void)
812{
813 unsigned int sum = 0;
814 struct zone *zone;
815
816 for_each_zone(zone)
817 sum += zone->free_pages;
818
819 return sum;
820}
821
822EXPORT_SYMBOL(nr_free_pages);
823
824#ifdef CONFIG_NUMA
825unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
826{
827 unsigned int i, sum = 0;
828
829 for (i = 0; i < MAX_NR_ZONES; i++)
830 sum += pgdat->node_zones[i].free_pages;
831
832 return sum;
833}
834#endif
835
836static unsigned int nr_free_zone_pages(int offset)
837{
838
839 pg_data_t *pgdat = NODE_DATA(numa_node_id());
840 unsigned int sum = 0;
841
842 struct zonelist *zonelist = pgdat->node_zonelists + offset;
843 struct zone **zonep = zonelist->zones;
844 struct zone *zone;
845
846 for (zone = *zonep++; zone; zone = *zonep++) {
847 unsigned long size = zone->present_pages;
848 unsigned long high = zone->pages_high;
849 if (size > high)
850 sum += size - high;
851 }
852
853 return sum;
854}
855
856
857
858
859unsigned int nr_free_buffer_pages(void)
860{
861 return nr_free_zone_pages(GFP_USER & GFP_ZONEMASK);
862}
863
864
865
866
867unsigned int nr_free_pagecache_pages(void)
868{
869 return nr_free_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK);
870}
871
872#ifdef CONFIG_HIGHMEM
873unsigned int nr_free_highpages (void)
874{
875 pg_data_t *pgdat;
876 unsigned int pages = 0;
877
878 for_each_pgdat(pgdat)
879 pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
880
881 return pages;
882}
883#endif
884
885#ifdef CONFIG_NUMA
886static void show_node(struct zone *zone)
887{
888 printk("Node %d ", zone->zone_pgdat->node_id);
889}
890#else
891#define show_node(zone) do { } while (0)
892#endif
893
894
895
896
897
898
899DEFINE_PER_CPU(struct page_state, page_states) = {0};
900EXPORT_PER_CPU_SYMBOL(page_states);
901
902atomic_t nr_pagecache = ATOMIC_INIT(0);
903EXPORT_SYMBOL(nr_pagecache);
904#ifdef CONFIG_SMP
905DEFINE_PER_CPU(long, nr_pagecache_local) = 0;
906#endif
907
908void __get_page_state(struct page_state *ret, int nr)
909{
910 int cpu = 0;
911
912 memset(ret, 0, sizeof(*ret));
913 while (cpu < NR_CPUS) {
914 unsigned long *in, *out, off;
915
916 if (!cpu_possible(cpu)) {
917 cpu++;
918 continue;
919 }
920
921 in = (unsigned long *)&per_cpu(page_states, cpu);
922 cpu++;
923 if (cpu < NR_CPUS && cpu_possible(cpu))
924 prefetch(&per_cpu(page_states, cpu));
925 out = (unsigned long *)ret;
926 for (off = 0; off < nr; off++)
927 *out++ += *in++;
928 }
929}
930
931void get_page_state(struct page_state *ret)
932{
933 int nr;
934
935 nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
936 nr /= sizeof(unsigned long);
937
938 __get_page_state(ret, nr + 1);
939}
940
941void get_full_page_state(struct page_state *ret)
942{
943 __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long));
944}
945
946unsigned long __read_page_state(unsigned offset)
947{
948 unsigned long ret = 0;
949 int cpu;
950
951 for (cpu = 0; cpu < NR_CPUS; cpu++) {
952 unsigned long in;
953
954 if (!cpu_possible(cpu))
955 continue;
956
957 in = (unsigned long)&per_cpu(page_states, cpu) + offset;
958 ret += *((unsigned long *)in);
959 }
960 return ret;
961}
962
963void __get_zone_counts(unsigned long *active, unsigned long *inactive,
964 unsigned long *free, struct pglist_data *pgdat)
965{
966 struct zone *zones = pgdat->node_zones;
967 int i;
968
969 *active = 0;
970 *inactive = 0;
971 *free = 0;
972 for (i = 0; i < MAX_NR_ZONES; i++) {
973 *active += zones[i].nr_active;
974 *inactive += zones[i].nr_inactive;
975 *free += zones[i].free_pages;
976 }
977}
978
979void get_zone_counts(unsigned long *active,
980 unsigned long *inactive, unsigned long *free)
981{
982 struct pglist_data *pgdat;
983
984 *active = 0;
985 *inactive = 0;
986 *free = 0;
987 for_each_pgdat(pgdat) {
988 unsigned long l, m, n;
989 __get_zone_counts(&l, &m, &n, pgdat);
990 *active += l;
991 *inactive += m;
992 *free += n;
993 }
994}
995
996void si_meminfo(struct sysinfo *val)
997{
998 val->totalram = totalram_pages;
999 val->sharedram = 0;
1000 val->freeram = nr_free_pages();
1001 val->bufferram = nr_blockdev_pages();
1002#ifdef CONFIG_HIGHMEM
1003 val->totalhigh = totalhigh_pages;
1004 val->freehigh = nr_free_highpages();
1005#else
1006 val->totalhigh = 0;
1007 val->freehigh = 0;
1008#endif
1009 val->mem_unit = PAGE_SIZE;
1010}
1011
1012EXPORT_SYMBOL(si_meminfo);
1013
1014#ifdef CONFIG_NUMA
1015void si_meminfo_node(struct sysinfo *val, int nid)
1016{
1017 pg_data_t *pgdat = NODE_DATA(nid);
1018
1019 val->totalram = pgdat->node_present_pages;
1020 val->freeram = nr_free_pages_pgdat(pgdat);
1021 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
1022 val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
1023 val->mem_unit = PAGE_SIZE;
1024}
1025#endif
1026
1027#define K(x) ((x) << (PAGE_SHIFT-10))
1028
1029
1030
1031
1032
1033
1034void show_free_areas(void)
1035{
1036 struct page_state ps;
1037 int cpu, temperature;
1038 unsigned long active;
1039 unsigned long inactive;
1040 unsigned long free;
1041 struct zone *zone;
1042
1043 for_each_zone(zone) {
1044 show_node(zone);
1045 printk("%s per-cpu:", zone->name);
1046
1047 if (!zone->present_pages) {
1048 printk(" empty\n");
1049 continue;
1050 } else
1051 printk("\n");
1052
1053 for (cpu = 0; cpu < NR_CPUS; ++cpu) {
1054 struct per_cpu_pageset *pageset;
1055
1056 if (!cpu_possible(cpu))
1057 continue;
1058
1059 pageset = zone->pageset + cpu;
1060
1061 for (temperature = 0; temperature < 2; temperature++)
1062 printk("cpu %d %s: low %d, high %d, batch %d\n",
1063 cpu,
1064 temperature ? "cold" : "hot",
1065 pageset->pcp[temperature].low,
1066 pageset->pcp[temperature].high,
1067 pageset->pcp[temperature].batch);
1068 }
1069 }
1070
1071 get_page_state(&ps);
1072 get_zone_counts(&active, &inactive, &free);
1073
1074 printk("\nFree pages: %11ukB (%ukB HighMem)\n",
1075 K(nr_free_pages()),
1076 K(nr_free_highpages()));
1077
1078 printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu "
1079 "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n",
1080 active,
1081 inactive,
1082 ps.nr_dirty,
1083 ps.nr_writeback,
1084 ps.nr_unstable,
1085 nr_free_pages(),
1086 ps.nr_slab,
1087 ps.nr_mapped,
1088 ps.nr_page_table_pages);
1089
1090 for_each_zone(zone) {
1091 int i;
1092
1093 show_node(zone);
1094 printk("%s"
1095 " free:%lukB"
1096 " min:%lukB"
1097 " low:%lukB"
1098 " high:%lukB"
1099 " active:%lukB"
1100 " inactive:%lukB"
1101 " present:%lukB"
1102 " pages_scanned:%lu"
1103 " all_unreclaimable? %s"
1104 "\n",
1105 zone->name,
1106 K(zone->free_pages),
1107 K(zone->pages_min),
1108 K(zone->pages_low),
1109 K(zone->pages_high),
1110 K(zone->nr_active),
1111 K(zone->nr_inactive),
1112 K(zone->present_pages),
1113 zone->pages_scanned,
1114 (zone->all_unreclaimable ? "yes" : "no")
1115 );
1116 printk("protections[]:");
1117 for (i = 0; i < MAX_NR_ZONES; i++)
1118 printk(" %lu", zone->protection[i]);
1119 printk("\n");
1120 }
1121
1122 for_each_zone(zone) {
1123 struct list_head *elem;
1124 unsigned long nr, flags, order, total = 0;
1125
1126 show_node(zone);
1127 printk("%s: ", zone->name);
1128 if (!zone->present_pages) {
1129 printk("empty\n");
1130 continue;
1131 }
1132
1133 for (order = 0; order < MAX_ORDER; order++) {
1134 nr = 0;
1135 spin_lock_irqsave(&zone->lock, flags);
1136 list_for_each(elem, &zone->free_area[order].free_list)
1137 ++nr;
1138 spin_unlock_irqrestore(&zone->lock, flags);
1139 total += nr << order;
1140 printk("%lu*%lukB ", nr, K(1UL) << order);
1141 }
1142 printk("= %lukB\n", K(total));
1143 }
1144
1145 printk("%d pagecache pages\n", get_page_cache_size());
1146
1147 show_swap_cache_info();
1148
1149#ifdef CONFIG_HIGHMEM
1150 printk("%d bounce buffer pages\n", atomic_read(&bouncepages));
1151#endif
1152}
1153
1154
1155
1156
1157static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int j, int k)
1158{
1159 switch (k) {
1160 struct zone *zone;
1161 default:
1162 BUG();
1163 case ZONE_HIGHMEM:
1164 zone = pgdat->node_zones + ZONE_HIGHMEM;
1165 if (zone->present_pages) {
1166#ifndef CONFIG_HIGHMEM
1167 BUG();
1168#endif
1169 zonelist->zones[j++] = zone;
1170 }
1171 case ZONE_NORMAL:
1172 zone = pgdat->node_zones + ZONE_NORMAL;
1173 if (zone->present_pages)
1174 zonelist->zones[j++] = zone;
1175#if defined(CONFIG_HIGHMEM64G) || (defined(CONFIG_X86_64) && !defined(CONFIG_XEN))
1176 break;
1177#endif
1178 case ZONE_DMA:
1179 zone = pgdat->node_zones + ZONE_DMA;
1180 if (zone->present_pages)
1181 zonelist->zones[j++] = zone;
1182 }
1183
1184 return j;
1185}
1186
1187#ifdef CONFIG_NUMA
1188#define MAX_NODE_LOAD (numnodes)
1189static int __initdata node_load[MAX_NUMNODES];
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205static int __init find_next_best_node(int node, void *used_node_mask)
1206{
1207 int i, n, val;
1208 int min_val = INT_MAX;
1209 int best_node = -1;
1210
1211 for (i = 0; i < numnodes; i++) {
1212 cpumask_t tmp;
1213
1214
1215 n = (node+i)%numnodes;
1216
1217
1218 if (test_bit(n, used_node_mask))
1219 continue;
1220
1221
1222 val = node_distance(node, n);
1223
1224
1225 tmp = node_to_cpumask(n);
1226 if (!cpus_empty(tmp))
1227 val += PENALTY_FOR_NODE_WITH_CPUS;
1228
1229
1230 val *= (MAX_NODE_LOAD*MAX_NUMNODES);
1231 val += node_load[n];
1232
1233 if (val < min_val) {
1234 min_val = val;
1235 best_node = n;
1236 }
1237 }
1238
1239 if (best_node >= 0)
1240 set_bit(best_node, used_node_mask);
1241
1242 return best_node;
1243}
1244
1245static void __init build_zonelists(pg_data_t *pgdat)
1246{
1247 int i, j, k, node, local_node;
1248 int prev_node, load;
1249 struct zonelist *zonelist;
1250 DECLARE_BITMAP(used_mask, MAX_NUMNODES);
1251
1252
1253 for (i = 0; i < GFP_ZONETYPES; i++) {
1254 zonelist = pgdat->node_zonelists + i;
1255 memset(zonelist, 0, sizeof(*zonelist));
1256 zonelist->zones[0] = NULL;
1257 }
1258
1259
1260 local_node = pgdat->node_id;
1261 load = numnodes;
1262 prev_node = local_node;
1263 bitmap_zero(used_mask, MAX_NUMNODES);
1264 while ((node = find_next_best_node(local_node, used_mask)) >= 0) {
1265
1266
1267
1268
1269
1270 if (node_distance(local_node, node) !=
1271 node_distance(local_node, prev_node))
1272 node_load[node] += load;
1273 prev_node = node;
1274 load--;
1275 for (i = 0; i < GFP_ZONETYPES; i++) {
1276 zonelist = pgdat->node_zonelists + i;
1277 for (j = 0; zonelist->zones[j] != NULL; j++);
1278
1279 k = ZONE_NORMAL;
1280 if (i & __GFP_HIGHMEM)
1281 k = ZONE_HIGHMEM;
1282 if (i & __GFP_DMA)
1283 k = ZONE_DMA;
1284
1285 j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
1286 zonelist->zones[j] = NULL;
1287 }
1288 }
1289}
1290
1291#else
1292
1293static void __init build_zonelists(pg_data_t *pgdat)
1294{
1295 int i, j, k, node, local_node;
1296
1297 local_node = pgdat->node_id;
1298 for (i = 0; i < GFP_ZONETYPES; i++) {
1299 struct zonelist *zonelist;
1300
1301 zonelist = pgdat->node_zonelists + i;
1302 memset(zonelist, 0, sizeof(*zonelist));
1303
1304 j = 0;
1305 k = ZONE_NORMAL;
1306 if (i & __GFP_HIGHMEM)
1307 k = ZONE_HIGHMEM;
1308 if (i & __GFP_DMA)
1309 k = ZONE_DMA;
1310
1311 j = build_zonelists_node(pgdat, zonelist, j, k);
1312
1313
1314
1315
1316
1317
1318
1319
1320 for (node = local_node + 1; node < numnodes; node++)
1321 j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
1322 for (node = 0; node < local_node; node++)
1323 j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
1324
1325 zonelist->zones[j] = NULL;
1326 }
1327}
1328
1329#endif
1330
1331void __init build_all_zonelists(void)
1332{
1333 int i;
1334
1335 for(i = 0 ; i < numnodes ; i++)
1336 build_zonelists(NODE_DATA(i));
1337 printk("Built %i zonelists\n", numnodes);
1338}
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351#define PAGES_PER_WAITQUEUE 256
1352
1353static inline unsigned long wait_table_size(unsigned long pages)
1354{
1355 unsigned long size = 1;
1356
1357 pages /= PAGES_PER_WAITQUEUE;
1358
1359 while (size < pages)
1360 size <<= 1;
1361
1362
1363
1364
1365
1366
1367 size = min(size, 4096UL);
1368
1369 return max(size, 4UL);
1370}
1371
1372
1373
1374
1375
1376
1377static inline unsigned long wait_table_bits(unsigned long size)
1378{
1379 return ffz(~size);
1380}
1381
1382#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
1383
1384static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
1385 unsigned long *zones_size, unsigned long *zholes_size)
1386{
1387 unsigned long realtotalpages, totalpages = 0;
1388 int i;
1389
1390 for (i = 0; i < MAX_NR_ZONES; i++)
1391 totalpages += zones_size[i];
1392 pgdat->node_spanned_pages = totalpages;
1393
1394 realtotalpages = totalpages;
1395 if (zholes_size)
1396 for (i = 0; i < MAX_NR_ZONES; i++)
1397 realtotalpages -= zholes_size[i];
1398 pgdat->node_present_pages = realtotalpages;
1399 printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages);
1400}
1401
1402
1403
1404
1405
1406
1407
1408void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone,
1409 unsigned long start_pfn)
1410{
1411 struct page *start = pfn_to_page(start_pfn);
1412 struct page *page;
1413
1414 for (page = start; page < (start + size); page++) {
1415 set_page_zone(page, NODEZONE(nid, zone));
1416 set_page_count(page, 0);
1417 reset_page_mapcount(page);
1418 SetPageReserved(page);
1419 INIT_LIST_HEAD(&page->lru);
1420#ifdef WANT_PAGE_VIRTUAL
1421
1422 if (!is_highmem_idx(zone))
1423 set_page_address(page, __va(start_pfn << PAGE_SHIFT));
1424#endif
1425 start_pfn++;
1426 }
1427}
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447unsigned long pages_to_bitmap_size(unsigned long order, unsigned long nr_pages)
1448{
1449 unsigned long bitmap_size;
1450
1451 bitmap_size = (nr_pages-1) >> (order+4);
1452 bitmap_size = LONG_ALIGN(bitmap_size+1);
1453
1454 return bitmap_size;
1455}
1456
1457void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, unsigned long size)
1458{
1459 int order;
1460 for (order = 0; ; order++) {
1461 unsigned long bitmap_size;
1462
1463 INIT_LIST_HEAD(&zone->free_area[order].free_list);
1464 if (order == MAX_ORDER-1) {
1465 zone->free_area[order].map = NULL;
1466 break;
1467 }
1468
1469 bitmap_size = pages_to_bitmap_size(order, size);
1470 zone->free_area[order].map =
1471 (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
1472 }
1473}
1474
1475#ifndef __HAVE_ARCH_MEMMAP_INIT
1476#define memmap_init(size, nid, zone, start_pfn) \
1477 memmap_init_zone((size), (nid), (zone), (start_pfn))
1478#endif
1479
1480
1481
1482
1483
1484
1485static void setup_pagelist_highmark(struct per_cpu_pageset *p,
1486 unsigned long high)
1487{
1488 struct per_cpu_pages *pcp;
1489
1490 pcp = &p->pcp[0];
1491 pcp->high = high;
1492 pcp->batch = max(1UL, high/4);
1493 if ((high/4) > (PAGE_SHIFT * 8))
1494 pcp->batch = PAGE_SHIFT * 8;
1495}
1496
1497
1498
1499
1500
1501
1502
1503
1504static void __init free_area_init_core(struct pglist_data *pgdat,
1505 unsigned long *zones_size, unsigned long *zholes_size)
1506{
1507 unsigned long i, j;
1508 const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
1509 int cpu, nid = pgdat->node_id;
1510 unsigned long zone_start_pfn = pgdat->node_start_pfn;
1511
1512 pgdat->nr_zones = 0;
1513 init_waitqueue_head(&pgdat->kswapd_wait);
1514
1515 for (j = 0; j < MAX_NR_ZONES; j++) {
1516 struct zone *zone = pgdat->node_zones + j;
1517 unsigned long size, realsize;
1518 unsigned long batch;
1519
1520 zone_table[NODEZONE(nid, j)] = zone;
1521 realsize = size = zones_size[j];
1522 if (zholes_size)
1523 realsize -= zholes_size[j];
1524
1525 if (j == ZONE_DMA || j == ZONE_NORMAL)
1526 nr_kernel_pages += realsize;
1527 nr_all_pages += realsize;
1528
1529 zone->spanned_pages = size;
1530 zone->present_pages = realsize;
1531 zone->name = zone_names[j];
1532 spin_lock_init(&zone->lock);
1533 spin_lock_init(&zone->lru_lock);
1534 zone->zone_pgdat = pgdat;
1535 zone->free_pages = 0;
1536
1537 zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
1538
1539
1540
1541
1542
1543
1544
1545
1546 batch = zone->present_pages / 1024;
1547 if (batch * PAGE_SIZE > 256 * 1024)
1548 batch = (256 * 1024) / PAGE_SIZE;
1549 batch /= 4;
1550 if (batch < 1)
1551 batch = 1;
1552
1553 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1554 struct per_cpu_pages *pcp;
1555
1556 pcp = &zone->pageset[cpu].pcp[0];
1557 pcp->count = 0;
1558 pcp->low = 2 * batch;
1559 pcp->high = 6 * batch;
1560 pcp->batch = 1 * batch;
1561 INIT_LIST_HEAD(&pcp->list);
1562
1563 pcp = &zone->pageset[cpu].pcp[1];
1564 pcp->count = 0;
1565 pcp->low = 0;
1566 pcp->high = 2 * batch;
1567 pcp->batch = 1 * batch;
1568 INIT_LIST_HEAD(&pcp->list);
1569 }
1570 printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",
1571 zone_names[j], realsize, batch);
1572 INIT_LIST_HEAD(&zone->active_list);
1573 INIT_LIST_HEAD(&zone->inactive_list);
1574 zone->nr_scan_active = 0;
1575 zone->nr_scan_inactive = 0;
1576 zone->nr_active = 0;
1577 zone->nr_inactive = 0;
1578 if (!size)
1579 continue;
1580
1581
1582
1583
1584
1585 zone->wait_table_size = wait_table_size(size);
1586 zone->wait_table_bits =
1587 wait_table_bits(zone->wait_table_size);
1588 zone->wait_table = (wait_queue_head_t *)
1589 alloc_bootmem_node(pgdat, zone->wait_table_size
1590 * sizeof(wait_queue_head_t));
1591
1592 for(i = 0; i < zone->wait_table_size; ++i)
1593 init_waitqueue_head(zone->wait_table + i);
1594
1595 pgdat->nr_zones = j+1;
1596
1597 zone->zone_mem_map = pfn_to_page(zone_start_pfn);
1598 zone->zone_start_pfn = zone_start_pfn;
1599
1600 if ((zone_start_pfn) & (zone_required_alignment-1))
1601 printk("BUG: wrong zone alignment, it will crash\n");
1602
1603 memmap_init(size, nid, j, zone_start_pfn);
1604
1605 zone_start_pfn += size;
1606
1607 zone_init_free_lists(pgdat, zone, zone->spanned_pages);
1608 }
1609}
1610
1611__attribute__((weak))
1612void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
1613{
1614 return NULL;
1615}
1616
1617void __init node_alloc_mem_map(struct pglist_data *pgdat)
1618{
1619 unsigned long size;
1620
1621 size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
1622 if (!pgdat->node_mem_map)
1623 pgdat->node_mem_map = alloc_bootmem_high_node(pgdat, size);
1624 if (!pgdat->node_mem_map)
1625 pgdat->node_mem_map = alloc_bootmem_node(pgdat, size);
1626#ifndef CONFIG_DISCONTIGMEM
1627 mem_map = contig_page_data.node_mem_map;
1628#endif
1629}
1630
1631void __init free_area_init_node(int nid, struct pglist_data *pgdat,
1632 unsigned long *zones_size, unsigned long node_start_pfn,
1633 unsigned long *zholes_size)
1634{
1635 pgdat->node_id = nid;
1636 pgdat->node_start_pfn = node_start_pfn;
1637 calculate_zone_totalpages(pgdat, zones_size, zholes_size);
1638
1639 if (!pfn_to_page(node_start_pfn))
1640 node_alloc_mem_map(pgdat);
1641
1642 free_area_init_core(pgdat, zones_size, zholes_size);
1643}
1644
1645#ifndef CONFIG_DISCONTIGMEM
1646static bootmem_data_t contig_bootmem_data;
1647struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data };
1648
1649EXPORT_SYMBOL(contig_page_data);
1650
1651void __init free_area_init(unsigned long *zones_size)
1652{
1653 free_area_init_node(0, &contig_page_data, zones_size,
1654 __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
1655}
1656#endif
1657
1658#ifdef CONFIG_PROC_FS
1659
1660#include <linux/seq_file.h>
1661
1662static void *frag_start(struct seq_file *m, loff_t *pos)
1663{
1664 pg_data_t *pgdat;
1665 loff_t node = *pos;
1666
1667 for (pgdat = pgdat_list; pgdat && node; pgdat = pgdat->pgdat_next)
1668 --node;
1669
1670 return pgdat;
1671}
1672
1673static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
1674{
1675 pg_data_t *pgdat = (pg_data_t *)arg;
1676
1677 (*pos)++;
1678 return pgdat->pgdat_next;
1679}
1680
1681static void frag_stop(struct seq_file *m, void *arg)
1682{
1683}
1684
1685
1686
1687
1688
1689static int frag_show(struct seq_file *m, void *arg)
1690{
1691 pg_data_t *pgdat = (pg_data_t *)arg;
1692 struct zone *zone;
1693 struct zone *node_zones = pgdat->node_zones;
1694 unsigned long flags;
1695 int order;
1696
1697 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1698 if (!zone->present_pages)
1699 continue;
1700
1701 spin_lock_irqsave(&zone->lock, flags);
1702 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1703 for (order = 0; order < MAX_ORDER; ++order) {
1704 unsigned long nr_bufs = 0;
1705 struct list_head *elem;
1706
1707 list_for_each(elem, &(zone->free_area[order].free_list))
1708 ++nr_bufs;
1709 seq_printf(m, "%6lu ", nr_bufs);
1710 }
1711 spin_unlock_irqrestore(&zone->lock, flags);
1712 seq_putc(m, '\n');
1713 }
1714 return 0;
1715}
1716
1717struct seq_operations fragmentation_op = {
1718 .start = frag_start,
1719 .next = frag_next,
1720 .stop = frag_stop,
1721 .show = frag_show,
1722};
1723
1724static char *vmstat_text[] = {
1725 "nr_dirty",
1726 "nr_writeback",
1727 "nr_unstable",
1728 "nr_page_table_pages",
1729 "nr_mapped",
1730 "nr_slab",
1731
1732 "pgpgin",
1733 "pgpgout",
1734 "pswpin",
1735 "pswpout",
1736 "pgalloc_high",
1737
1738 "pgalloc_normal",
1739 "pgalloc_dma",
1740 "pgfree",
1741 "pgactivate",
1742 "pgdeactivate",
1743
1744 "pgfault",
1745 "pgmajfault",
1746 "pgrefill_high",
1747 "pgrefill_normal",
1748 "pgrefill_dma",
1749
1750 "pgsteal_high",
1751 "pgsteal_normal",
1752 "pgsteal_dma",
1753 "pgscan_kswapd_high",
1754 "pgscan_kswapd_normal",
1755
1756 "pgscan_kswapd_dma",
1757 "pgscan_direct_high",
1758 "pgscan_direct_normal",
1759 "pgscan_direct_dma",
1760 "pginodesteal",
1761
1762 "slabs_scanned",
1763 "kswapd_steal",
1764 "kswapd_inodesteal",
1765 "pageoutrun",
1766 "allocstall",
1767
1768 "pgrotated",
1769};
1770
1771static void *vmstat_start(struct seq_file *m, loff_t *pos)
1772{
1773 struct page_state *ps;
1774
1775 if (*pos >= ARRAY_SIZE(vmstat_text))
1776 return NULL;
1777
1778 ps = kmalloc(sizeof(*ps), GFP_KERNEL);
1779 m->private = ps;
1780 if (!ps)
1781 return ERR_PTR(-ENOMEM);
1782 get_full_page_state(ps);
1783 ps->pgpgin /= 2;
1784 ps->pgpgout /= 2;
1785 return (unsigned long *)ps + *pos;
1786}
1787
1788static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1789{
1790 (*pos)++;
1791 if (*pos >= ARRAY_SIZE(vmstat_text))
1792 return NULL;
1793 return (unsigned long *)m->private + *pos;
1794}
1795
1796static int vmstat_show(struct seq_file *m, void *arg)
1797{
1798 unsigned long *l = arg;
1799 unsigned long off = l - (unsigned long *)m->private;
1800
1801 seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
1802 return 0;
1803}
1804
1805static void vmstat_stop(struct seq_file *m, void *arg)
1806{
1807 kfree(m->private);
1808 m->private = NULL;
1809}
1810
1811struct seq_operations vmstat_op = {
1812 .start = vmstat_start,
1813 .next = vmstat_next,
1814 .stop = vmstat_stop,
1815 .show = vmstat_show,
1816};
1817
1818#endif
1819
1820#ifdef CONFIG_HOTPLUG_CPU
1821static int page_alloc_cpu_notify(struct notifier_block *self,
1822 unsigned long action, void *hcpu)
1823{
1824 int cpu = (unsigned long)hcpu;
1825 long *count;
1826
1827 if (action == CPU_DEAD) {
1828
1829 count = &per_cpu(nr_pagecache_local, cpu);
1830 atomic_add(*count, &nr_pagecache);
1831 *count = 0;
1832 local_irq_disable();
1833 __drain_pages(cpu);
1834 local_irq_enable();
1835 }
1836 return NOTIFY_OK;
1837}
1838#endif
1839
1840void __init page_alloc_init(void)
1841{
1842 hotcpu_notifier(page_alloc_cpu_notify, 0);
1843}
1844
1845static unsigned long higherzone_val(struct zone *z, int max_zone,
1846 int alloc_type)
1847{
1848 int z_idx = zone_idx(z);
1849 struct zone *higherzone;
1850 unsigned long pages;
1851
1852
1853 if (z_idx == MAX_NR_ZONES-1)
1854 return 0;
1855
1856 higherzone = &z->zone_pgdat->node_zones[z_idx+1];
1857
1858
1859 pages = higherzone->protection[alloc_type];
1860
1861
1862
1863
1864
1865
1866
1867
1868 if (higherzone->present_pages && z_idx < alloc_type)
1869 pages += higherzone->pages_low * sysctl_lower_zone_protection;
1870
1871 return pages;
1872}
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883static void setup_per_zone_protection(void)
1884{
1885 struct pglist_data *pgdat;
1886 struct zone *zones, *zone;
1887 int max_zone;
1888 int i, j;
1889
1890 for_each_pgdat(pgdat) {
1891 zones = pgdat->node_zones;
1892
1893 for (i = 0, max_zone = 0; i < MAX_NR_ZONES; i++)
1894 if (zones[i].present_pages)
1895 max_zone = i;
1896
1897
1898
1899
1900
1901 for (i = 0; i < GFP_ZONETYPES; i++) {
1902
1903
1904
1905
1906 for (j = MAX_NR_ZONES-1; j >= 0; j--) {
1907 zone = &zones[j];
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917 if (j > max_zone || j >= i) {
1918 zone->protection[i] = 0;
1919 continue;
1920 }
1921
1922
1923
1924 zone->protection[i] = higherzone_val(zone,
1925 max_zone, i);
1926 }
1927 }
1928 }
1929}
1930
1931
1932
1933
1934
1935
1936static void setup_per_zone_pages_min(void)
1937{
1938 unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
1939 unsigned long lowmem_pages = 0;
1940 struct zone *zone;
1941 unsigned long flags;
1942
1943
1944 for_each_zone(zone) {
1945 if (!is_highmem(zone))
1946 lowmem_pages += zone->present_pages;
1947 }
1948
1949 for_each_zone(zone) {
1950 spin_lock_irqsave(&zone->lru_lock, flags);
1951 if (is_highmem(zone)) {
1952
1953
1954
1955
1956
1957
1958 int min_pages;
1959
1960 min_pages = zone->present_pages / 1024;
1961 if (min_pages < SWAP_CLUSTER_MAX)
1962 min_pages = SWAP_CLUSTER_MAX;
1963 if (min_pages > 128)
1964 min_pages = 128;
1965 zone->pages_min = min_pages;
1966 } else {
1967
1968
1969
1970 zone->pages_min = (pages_min * zone->present_pages) /
1971 lowmem_pages;
1972 }
1973
1974 zone->pages_low = zone->pages_min * 2;
1975 zone->pages_high = zone->pages_min * 3;
1976 spin_unlock_irqrestore(&zone->lru_lock, flags);
1977 }
1978}
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004static int __init init_per_zone_pages_min(void)
2005{
2006 unsigned long lowmem_kbytes;
2007
2008 lowmem_kbytes = nr_free_buffer_pages() * (PAGE_SIZE >> 10);
2009
2010 min_free_kbytes = int_sqrt(lowmem_kbytes *16);
2011 if (min_free_kbytes < 128)
2012 min_free_kbytes = 128;
2013 if (min_free_kbytes > 65536)
2014 min_free_kbytes = 65536;
2015 setup_per_zone_pages_min();
2016 setup_per_zone_protection();
2017 return 0;
2018}
2019module_init(init_per_zone_pages_min)
2020
2021
2022
2023
2024
2025
2026int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
2027 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
2028{
2029 proc_dointvec(table, write, file, buffer, length, ppos);
2030 setup_per_zone_pages_min();
2031 setup_per_zone_protection();
2032 return 0;
2033}
2034
2035
2036
2037
2038
2039
2040int lower_zone_protection_sysctl_handler(ctl_table *table, int write,
2041 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
2042{
2043 proc_dointvec_minmax(table, write, file, buffer, length, ppos);
2044 setup_per_zone_protection();
2045 return 0;
2046}
2047
2048
2049
2050
2051
2052
2053
2054int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
2055 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
2056{
2057 struct zone *zone;
2058 unsigned int cpu;
2059 int ret;
2060
2061 ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
2062 if (!write || (ret == -EINVAL))
2063 return ret;
2064 for_each_zone(zone) {
2065 for_each_online_cpu(cpu) {
2066 unsigned long high;
2067 high = zone->present_pages / percpu_pagelist_fraction;
2068 setup_pagelist_highmark(&zone->pageset[cpu], high);
2069 }
2070 }
2071 return 0;
2072}
2073
2074
2075
2076
2077
2078
2079void *__init alloc_large_system_hash(const char *tablename,
2080 unsigned long bucketsize,
2081 unsigned long numentries,
2082 int scale,
2083 int consider_highmem,
2084 unsigned int *_hash_shift,
2085 unsigned int *_hash_mask)
2086{
2087 unsigned long long max;
2088 unsigned long log2qty, size;
2089 void *table;
2090
2091
2092 if (!numentries) {
2093
2094 numentries = consider_highmem ? nr_all_pages : nr_kernel_pages;
2095 numentries += (1UL << (20 - PAGE_SHIFT)) - 1;
2096 numentries >>= 20 - PAGE_SHIFT;
2097 numentries <<= 20 - PAGE_SHIFT;
2098
2099
2100 if (scale > PAGE_SHIFT)
2101 numentries >>= (scale - PAGE_SHIFT);
2102 else
2103 numentries <<= (PAGE_SHIFT - scale);
2104 }
2105
2106 numentries = 1UL << (long_log2(numentries) + 1);
2107
2108
2109 max = ((unsigned long long)nr_all_pages << PAGE_SHIFT) >> 4;
2110 do_div(max, bucketsize);
2111
2112 if (numentries > max)
2113 numentries = max;
2114
2115 log2qty = long_log2(numentries);
2116
2117 do {
2118 size = bucketsize << log2qty;
2119 table = alloc_bootmem(size);
2120 } while (!table && size > PAGE_SIZE && --log2qty);
2121
2122 if (!table)
2123 panic("Failed to allocate %s hash table\n", tablename);
2124
2125 printk("%s hash table entries: %d (order: %d, %lu bytes)\n",
2126 tablename,
2127 (1U << log2qty),
2128 long_log2(size) - PAGE_SHIFT,
2129 size);
2130
2131 if (_hash_shift)
2132 *_hash_shift = log2qty;
2133 if (_hash_mask)
2134 *_hash_mask = (1 << log2qty) - 1;
2135
2136 return table;
2137}
2138