1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/mm.h>
48#include <linux/pagemap.h>
49#include <linux/swap.h>
50#include <linux/swapops.h>
51#include <linux/slab.h>
52#include <linux/init.h>
53#include <linux/rmap.h>
54#include <linux/rcupdate.h>
55
56#include <asm/tlbflush.h>
57
58#ifdef CONFIG_HIGHMEM
59extern atomic_t nr_mapped_high;
60static inline void inc_mapped_high(struct page *page)
61{
62 if (is_highmem(page_zone(page)))
63 atomic_inc(&nr_mapped_high);
64}
65
66static inline void dec_mapped_high(struct page *page)
67{
68 if (is_highmem(page_zone(page)))
69 atomic_dec(&nr_mapped_high);
70}
71#else
72#define inc_mapped_high(page)
73#define dec_mapped_high(page)
74#endif
75
76
77
78kmem_cache_t *anon_vma_cachep;
79
80static inline void validate_anon_vma(struct vm_area_struct *find_vma)
81{
82#ifdef RMAP_DEBUG
83 struct anon_vma *anon_vma = find_vma->anon_vma;
84 struct vm_area_struct *vma;
85 unsigned int mapcount = 0;
86 int found = 0;
87
88 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
89 mapcount++;
90 BUG_ON(mapcount > 100000);
91 if (vma == find_vma)
92 found = 1;
93 }
94 BUG_ON(!found);
95#endif
96}
97
98
99int anon_vma_prepare(struct vm_area_struct *vma)
100{
101 struct anon_vma *anon_vma = vma->anon_vma;
102
103 might_sleep();
104 if (unlikely(!anon_vma)) {
105 struct mm_struct *mm = vma->vm_mm;
106 struct anon_vma *allocated, *locked;
107
108 anon_vma = find_mergeable_anon_vma(vma);
109 if (anon_vma) {
110 allocated = NULL;
111 locked = anon_vma;
112 spin_lock(&locked->lock);
113 } else {
114 anon_vma = anon_vma_alloc();
115 if (unlikely(!anon_vma))
116 return -ENOMEM;
117 allocated = anon_vma;
118 locked = NULL;
119 }
120
121
122 spin_lock(&mm->page_table_lock);
123 if (likely(!vma->anon_vma)) {
124 vma->anon_vma = anon_vma;
125 list_add(&vma->anon_vma_node, &anon_vma->head);
126 allocated = NULL;
127 }
128 spin_unlock(&mm->page_table_lock);
129
130 if (locked)
131 spin_unlock(&locked->lock);
132 if (unlikely(allocated))
133 anon_vma_free(allocated);
134 }
135 return 0;
136}
137
138void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next)
139{
140 if (!vma->anon_vma) {
141 BUG_ON(!next->anon_vma);
142 vma->anon_vma = next->anon_vma;
143 list_add(&vma->anon_vma_node, &next->anon_vma_node);
144 } else {
145
146 BUG_ON(vma->anon_vma != next->anon_vma);
147 }
148 list_del(&next->anon_vma_node);
149}
150
151void __anon_vma_link(struct vm_area_struct *vma)
152{
153 struct anon_vma *anon_vma = vma->anon_vma;
154
155 if (anon_vma) {
156 list_add(&vma->anon_vma_node, &anon_vma->head);
157 validate_anon_vma(vma);
158 }
159}
160
161void anon_vma_link(struct vm_area_struct *vma)
162{
163 struct anon_vma *anon_vma = vma->anon_vma;
164
165 if (anon_vma) {
166 spin_lock(&anon_vma->lock);
167 list_add(&vma->anon_vma_node, &anon_vma->head);
168 validate_anon_vma(vma);
169 spin_unlock(&anon_vma->lock);
170 }
171}
172
173void anon_vma_unlink(struct vm_area_struct *vma)
174{
175 struct anon_vma *anon_vma = vma->anon_vma;
176 int empty;
177
178 if (!anon_vma)
179 return;
180
181 spin_lock(&anon_vma->lock);
182 validate_anon_vma(vma);
183 list_del(&vma->anon_vma_node);
184
185
186 empty = list_empty(&anon_vma->head);
187 spin_unlock(&anon_vma->lock);
188
189 if (empty)
190 anon_vma_free(anon_vma);
191}
192
193static void anon_vma_ctor(void *data, kmem_cache_t *cachep, unsigned long flags)
194{
195 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
196 SLAB_CTOR_CONSTRUCTOR) {
197 struct anon_vma *anon_vma = data;
198
199 spin_lock_init(&anon_vma->lock);
200 INIT_LIST_HEAD(&anon_vma->head);
201 }
202}
203
204void __init anon_vma_init(void)
205{
206 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
207 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor, NULL);
208}
209
210
211
212
213
214static struct anon_vma *page_lock_anon_vma(struct page *page)
215{
216 struct anon_vma *anon_vma = NULL;
217 unsigned long anon_mapping;
218
219 rcu_read_lock();
220 anon_mapping = (unsigned long) page->mapping;
221 if (!(anon_mapping & PAGE_MAPPING_ANON))
222 goto out;
223 if (!page_mapped(page))
224 goto out;
225
226 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
227 spin_lock(&anon_vma->lock);
228out:
229 rcu_read_unlock();
230 return anon_vma;
231}
232
233
234
235
236static inline unsigned long
237vma_address(struct page *page, struct vm_area_struct *vma)
238{
239 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
240 unsigned long address;
241
242 address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
243 if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
244
245 BUG_ON(!PageAnon(page));
246 return -EFAULT;
247 }
248 return address;
249}
250
251
252
253
254
255unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
256{
257 if (PageAnon(page)) {
258 if ((void *)vma->anon_vma !=
259 (void *)page->mapping - PAGE_MAPPING_ANON)
260 return -EFAULT;
261 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
262 if (vma->vm_file->f_mapping != page->mapping)
263 return -EFAULT;
264 } else
265 return -EFAULT;
266 return vma_address(page, vma);
267}
268
269
270
271
272
273static int page_referenced_one(struct page *page,
274 struct vm_area_struct *vma, unsigned int *mapcount, int ignore_token)
275{
276 struct mm_struct *mm = vma->vm_mm;
277 unsigned long address;
278 pgd_t *pgd;
279 pmd_t *pmd;
280 pte_t *pte;
281 int referenced = 0;
282
283 if (!mm->rss)
284 goto out;
285 address = vma_address(page, vma);
286 if (address == -EFAULT)
287 goto out;
288
289 spin_lock(&mm->page_table_lock);
290
291 pgd = pgd_offset(mm, address);
292 if (!pgd_present(*pgd))
293 goto out_unlock;
294
295 pmd = pmd_offset(pgd, address);
296 if (!pmd_present(*pmd))
297 goto out_unlock;
298
299 pte = pte_offset_map(pmd, address);
300 if (!pte_present(*pte))
301 goto out_unmap;
302
303 if (page_to_pfn(page) != pte_pfn(*pte))
304 goto out_unmap;
305
306 if (ptep_clear_flush_young(vma, address, pte))
307 referenced++;
308
309 if (mm != current->mm && !ignore_token && has_swap_token(mm))
310 referenced++;
311
312 (*mapcount)--;
313
314out_unmap:
315 pte_unmap(pte);
316out_unlock:
317 spin_unlock(&mm->page_table_lock);
318out:
319 return referenced;
320}
321
322static int page_referenced_anon(struct page *page, int ignore_token)
323{
324 unsigned int mapcount;
325 struct anon_vma *anon_vma;
326 struct vm_area_struct *vma;
327 int referenced = 0;
328
329 anon_vma = page_lock_anon_vma(page);
330 if (!anon_vma)
331 return referenced;
332
333 mapcount = page_mapcount(page);
334 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
335 referenced += page_referenced_one(page, vma, &mapcount,
336 ignore_token);
337 if (!mapcount)
338 break;
339 }
340 spin_unlock(&anon_vma->lock);
341 return referenced;
342}
343
344
345
346
347
348
349
350
351
352
353
354
355static int page_referenced_file(struct page *page, int ignore_token)
356{
357 unsigned int mapcount;
358 struct address_space *mapping = page->mapping;
359 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
360 struct vm_area_struct *vma;
361 struct prio_tree_iter iter;
362 int referenced = 0;
363
364
365
366
367
368
369 BUG_ON(PageAnon(page));
370
371
372
373
374
375
376
377 BUG_ON(!PageLocked(page));
378
379 spin_lock(&mapping->i_mmap_lock);
380
381
382
383
384
385 mapcount = page_mapcount(page);
386
387 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
388 if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE))
389 == (VM_LOCKED|VM_MAYSHARE)) {
390 referenced++;
391 break;
392 }
393 referenced += page_referenced_one(page, vma, &mapcount,
394 ignore_token);
395 if (!mapcount)
396 break;
397 }
398
399 spin_unlock(&mapping->i_mmap_lock);
400 return referenced;
401}
402
403
404
405
406
407
408
409
410
411int page_referenced(struct page *page, int is_locked, int ignore_token)
412{
413 int referenced = 0;
414
415 if (!swap_token_default_timeout)
416 ignore_token = 1;
417
418 if (page_test_and_clear_young(page))
419 referenced++;
420
421 if (TestClearPageReferenced(page))
422 referenced++;
423
424 if (page_mapped(page) && page->mapping) {
425 if (PageAnon(page))
426 referenced += page_referenced_anon(page, ignore_token);
427 else if (is_locked)
428 referenced += page_referenced_file(page, ignore_token);
429 else if (TestSetPageLocked(page))
430 referenced++;
431 else {
432 if (page->mapping)
433 referenced += page_referenced_file(page,
434 ignore_token);
435 unlock_page(page);
436 }
437 }
438 return referenced;
439}
440
441
442
443
444
445
446
447
448
449void page_add_anon_rmap(struct page *page,
450 struct vm_area_struct *vma, unsigned long address)
451{
452 struct anon_vma *anon_vma = vma->anon_vma;
453 pgoff_t index;
454
455 BUG_ON(PageReserved(page));
456 BUG_ON(!anon_vma);
457
458 vma->vm_mm->anon_rss++;
459
460 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
461 index = (address - vma->vm_start) >> PAGE_SHIFT;
462 index += vma->vm_pgoff;
463 index >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
464
465 if (atomic_inc_and_test(&page->_mapcount)) {
466 page->index = index;
467 page->mapping = (struct address_space *) anon_vma;
468 inc_page_state(nr_mapped);
469 inc_mapped_high(page);
470 }
471
472}
473
474
475
476
477
478
479
480void page_add_file_rmap(struct page *page)
481{
482 BUG_ON(PageAnon(page));
483 if (!pfn_valid(page_to_pfn(page)) || PageReserved(page))
484 return;
485
486 if (atomic_inc_and_test(&page->_mapcount)) {
487 inc_page_state(nr_mapped);
488 inc_mapped_high(page);
489 }
490}
491
492
493
494
495
496
497
498void page_remove_rmap(struct page *page)
499{
500 BUG_ON(PageReserved(page));
501
502 if (atomic_add_negative(-1, &page->_mapcount)) {
503 if (unlikely(page_mapcount(page) < 0)) {
504 printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page));
505 printk (KERN_EMERG " page->flags = %lx\n", page->flags);
506 printk (KERN_EMERG " page->count = %x\n", page_count(page));
507 printk (KERN_EMERG " page->mapping = %p\n", page->mapping);
508 BUG();
509 }
510
511
512
513
514
515
516
517
518
519 if (page_test_and_clear_dirty(page))
520 set_page_dirty(page);
521 dec_page_state(nr_mapped);
522 dec_mapped_high(page);
523
524
525
526
527 if (pagecache_over_max() && !PageAnon(page))
528 deactivate_unmapped_page(page);
529 }
530}
531
532
533
534
535
536static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
537{
538 struct mm_struct *mm = vma->vm_mm;
539 unsigned long address;
540 pgd_t *pgd;
541 pmd_t *pmd;
542 pte_t *pte;
543 pte_t pteval;
544 int ret = SWAP_AGAIN;
545
546 if (!mm->rss)
547 goto out;
548 address = vma_address(page, vma);
549 if (address == -EFAULT)
550 goto out;
551
552
553
554
555
556 spin_lock(&mm->page_table_lock);
557
558 pgd = pgd_offset(mm, address);
559 if (!pgd_present(*pgd))
560 goto out_unlock;
561
562 pmd = pmd_offset(pgd, address);
563 if (!pmd_present(*pmd))
564 goto out_unlock;
565
566 pte = pte_offset_map(pmd, address);
567 if (!pte_present(*pte))
568 goto out_unmap;
569
570 if (page_to_pfn(page) != pte_pfn(*pte))
571 goto out_unmap;
572
573
574
575
576
577
578 if ((vma->vm_flags & (VM_LOCKED|VM_RESERVED)) ||
579 ptep_clear_flush_young(vma, address, pte)) {
580 ret = SWAP_FAIL;
581 goto out_unmap;
582 }
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599 if (PageSwapCache(page) &&
600 page_count(page) != page_mapcount(page) + 2) {
601 ret = SWAP_FAIL;
602 goto out_unmap;
603 }
604
605
606 flush_cache_page(vma, address);
607 pteval = ptep_clear_flush(vma, address, pte);
608
609
610 if (pte_dirty(pteval))
611 set_page_dirty(page);
612
613 if (PageAnon(page)) {
614 swp_entry_t entry = { .val = page->private };
615
616
617
618
619 BUG_ON(!PageSwapCache(page));
620 swap_duplicate(entry);
621 set_pte(pte, swp_entry_to_pte(entry));
622 BUG_ON(pte_file(*pte));
623 mm->anon_rss--;
624 }
625
626 mm->rss--;
627 page_remove_rmap(page);
628 page_cache_release(page);
629
630out_unmap:
631 pte_unmap(pte);
632out_unlock:
633 spin_unlock(&mm->page_table_lock);
634out:
635 return ret;
636}
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
658#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
659
660static void try_to_unmap_cluster(unsigned long cursor,
661 unsigned int *mapcount, struct vm_area_struct *vma)
662{
663 struct mm_struct *mm = vma->vm_mm;
664 pgd_t *pgd;
665 pmd_t *pmd;
666 pte_t *pte, *original_pte;
667 pte_t pteval;
668 struct page *page;
669 unsigned long address;
670 unsigned long end;
671 unsigned long pfn;
672
673
674
675
676
677 spin_lock(&mm->page_table_lock);
678
679 address = (vma->vm_start + cursor) & CLUSTER_MASK;
680 end = address + CLUSTER_SIZE;
681 if (address < vma->vm_start)
682 address = vma->vm_start;
683 if (end > vma->vm_end)
684 end = vma->vm_end;
685
686 pgd = pgd_offset(mm, address);
687 if (!pgd_present(*pgd))
688 goto out_unlock;
689
690 pmd = pmd_offset(pgd, address);
691 if (!pmd_present(*pmd))
692 goto out_unlock;
693
694 for (original_pte = pte = pte_offset_map(pmd, address);
695 address < end; pte++, address += PAGE_SIZE) {
696
697 if (!pte_present(*pte))
698 continue;
699
700 pfn = pte_pfn(*pte);
701 if (!pfn_valid(pfn))
702 continue;
703
704 page = pfn_to_page(pfn);
705 BUG_ON(PageAnon(page));
706 if (PageReserved(page))
707 continue;
708
709 if (ptep_clear_flush_young(vma, address, pte))
710 continue;
711
712
713 flush_cache_page(vma, address);
714 pteval = ptep_clear_flush(vma, address, pte);
715
716
717 if (page->index != linear_page_index(vma, address))
718 set_pte(pte, pgoff_to_pte(page->index));
719
720
721 if (pte_dirty(pteval))
722 set_page_dirty(page);
723
724 page_remove_rmap(page);
725 page_cache_release(page);
726 mm->rss--;
727 (*mapcount)--;
728 }
729
730 pte_unmap(original_pte);
731
732out_unlock:
733 spin_unlock(&mm->page_table_lock);
734}
735
736static int try_to_unmap_anon(struct page *page)
737{
738 struct anon_vma *anon_vma;
739 struct vm_area_struct *vma;
740 int ret = SWAP_AGAIN;
741
742 anon_vma = page_lock_anon_vma(page);
743 if (!anon_vma)
744 return ret;
745
746 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
747 ret = try_to_unmap_one(page, vma);
748 if (ret == SWAP_FAIL || !page_mapped(page))
749 break;
750 }
751 spin_unlock(&anon_vma->lock);
752 return ret;
753}
754
755
756
757
758
759
760
761
762
763
764static int try_to_unmap_file(struct page *page)
765{
766 struct address_space *mapping = page->mapping;
767 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
768 struct vm_area_struct *vma;
769 struct prio_tree_iter iter;
770 int ret = SWAP_AGAIN;
771 unsigned long cursor;
772 unsigned long max_nl_cursor = 0;
773 unsigned long max_nl_size = 0;
774 unsigned int mapcount;
775
776 spin_lock(&mapping->i_mmap_lock);
777 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
778 ret = try_to_unmap_one(page, vma);
779 if (ret == SWAP_FAIL || !page_mapped(page))
780 goto out;
781 }
782
783 if (list_empty(&mapping->i_mmap_nonlinear))
784 goto out;
785
786 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
787 shared.vm_set.list) {
788 if (vma->vm_flags & (VM_LOCKED|VM_RESERVED))
789 continue;
790 cursor = (unsigned long) vma->vm_private_data;
791 if (cursor > max_nl_cursor)
792 max_nl_cursor = cursor;
793 cursor = vma->vm_end - vma->vm_start;
794 if (cursor > max_nl_size)
795 max_nl_size = cursor;
796 }
797
798 if (max_nl_size == 0) {
799 ret = SWAP_FAIL;
800 goto out;
801 }
802
803
804
805
806
807
808
809
810 mapcount = page_mapcount(page);
811 if (!mapcount)
812 goto out;
813 cond_resched_lock(&mapping->i_mmap_lock);
814
815 max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
816 if (max_nl_cursor == 0)
817 max_nl_cursor = CLUSTER_SIZE;
818
819 do {
820 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
821 shared.vm_set.list) {
822 if (vma->vm_flags & (VM_LOCKED|VM_RESERVED))
823 continue;
824 cursor = (unsigned long) vma->vm_private_data;
825 while (vma->vm_mm->rss &&
826 cursor < max_nl_cursor &&
827 cursor < vma->vm_end - vma->vm_start) {
828 try_to_unmap_cluster(cursor, &mapcount, vma);
829 cursor += CLUSTER_SIZE;
830 vma->vm_private_data = (void *) cursor;
831 if ((int)mapcount <= 0)
832 goto out;
833 }
834 vma->vm_private_data = (void *) max_nl_cursor;
835 }
836 cond_resched_lock(&mapping->i_mmap_lock);
837 max_nl_cursor += CLUSTER_SIZE;
838 } while (max_nl_cursor <= max_nl_size);
839
840
841
842
843
844
845 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
846 shared.vm_set.list) {
847 if (!(vma->vm_flags & VM_RESERVED))
848 vma->vm_private_data = NULL;
849 }
850out:
851 spin_unlock(&mapping->i_mmap_lock);
852 return ret;
853}
854
855
856
857
858
859
860
861
862
863
864
865
866
867int try_to_unmap(struct page *page)
868{
869 int ret;
870
871 BUG_ON(PageReserved(page));
872 BUG_ON(!PageLocked(page));
873
874 if (PageAnon(page))
875 ret = try_to_unmap_anon(page);
876 else
877 ret = try_to_unmap_file(page);
878
879 if (!page_mapped(page))
880 ret = SWAP_SUCCESS;
881 return ret;
882}
883