1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39#include <linux/kernel_stat.h>
40#include <linux/mm.h>
41#include <linux/hugetlb.h>
42#include <linux/mman.h>
43#include <linux/swap.h>
44#include <linux/highmem.h>
45#include <linux/pagemap.h>
46#include <linux/rmap.h>
47#include <linux/module.h>
48#include <linux/init.h>
49
50#include <asm/pgalloc.h>
51#include <asm/uaccess.h>
52#include <asm/tlb.h>
53#include <asm/tlbflush.h>
54#include <asm/pgtable.h>
55
56#include <linux/swapops.h>
57#include <linux/elf.h>
58
59#ifndef CONFIG_DISCONTIGMEM
60
61unsigned long max_mapnr;
62struct page *mem_map;
63
64EXPORT_SYMBOL(max_mapnr);
65EXPORT_SYMBOL(mem_map);
66#endif
67
68unsigned long num_physpages;
69
70
71
72
73
74
75
76void * high_memory;
77struct page *highmem_start_page;
78unsigned long vmalloc_earlyreserve;
79
80EXPORT_SYMBOL(num_physpages);
81EXPORT_SYMBOL(highmem_start_page);
82EXPORT_SYMBOL(high_memory);
83EXPORT_SYMBOL(vmalloc_earlyreserve);
84
85
86
87
88
89
90
91void pgd_clear_bad(pgd_t *pgd)
92{
93 pgd_ERROR(*pgd);
94 pgd_clear(pgd);
95}
96
97void pmd_clear_bad(pmd_t *pmd)
98{
99 pmd_ERROR(*pmd);
100 pmd_clear(pmd);
101}
102
103
104
105
106
107
108static inline void copy_cow_page(struct page * from, struct page * to, unsigned long address)
109{
110 if (from == ZERO_PAGE(address)) {
111 clear_user_highpage(to, address);
112 return;
113 }
114 copy_user_highpage(to, from, address);
115}
116
117
118
119
120
121static inline void free_one_pmd(struct mmu_gather *tlb, pmd_t * dir)
122{
123 struct page *page;
124
125 if (pmd_none(*dir))
126 return;
127 if (unlikely(pmd_bad(*dir))) {
128 pmd_ERROR(*dir);
129 pmd_clear(dir);
130 return;
131 }
132 page = pmd_page(*dir);
133 pmd_clear(dir);
134 dec_page_state(nr_page_table_pages);
135 pte_free_tlb(tlb, page);
136}
137
138static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir,
139 int pgd_idx)
140{
141 int j;
142 pmd_t * pmd;
143
144 if (pgd_none(*dir))
145 return;
146 if (unlikely(pgd_bad(*dir))) {
147 pgd_ERROR(*dir);
148 pgd_clear(dir);
149 return;
150 }
151 pmd = pmd_offset(dir, 0);
152 pgd_clear(dir);
153 for (j = 0; j < PTRS_PER_PMD ; j++) {
154 if (pgd_idx * PGDIR_SIZE + j * PMD_SIZE >= MM_VM_SIZE(tlb->mm))
155 break;
156 free_one_pmd(tlb, pmd+j);
157 }
158 pmd_free_tlb(tlb, pmd);
159}
160
161
162
163
164
165
166
167void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr)
168{
169 pgd_t * page_dir = tlb->mm->pgd;
170 int pgd_idx = first;
171
172 page_dir += first;
173 do {
174 free_one_pgd(tlb, page_dir, pgd_idx);
175 page_dir++;
176 pgd_idx++;
177 } while (--nr);
178}
179
180pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
181{
182 if (!pmd_present(*pmd)) {
183 struct page *new;
184
185 spin_unlock(&mm->page_table_lock);
186 new = pte_alloc_one(mm, address);
187 spin_lock(&mm->page_table_lock);
188 if (!new)
189 return NULL;
190
191
192
193
194
195 if (pmd_present(*pmd)) {
196 pte_free(new);
197 goto out;
198 }
199 inc_page_state(nr_page_table_pages);
200 pmd_populate(mm, pmd, new);
201 }
202out:
203 return pte_offset_map(pmd, address);
204}
205
206pte_t fastcall * pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
207{
208 if (!pmd_present(*pmd)) {
209 pte_t *new;
210
211 spin_unlock(&mm->page_table_lock);
212 new = pte_alloc_one_kernel(mm, address);
213 spin_lock(&mm->page_table_lock);
214 if (!new)
215 return NULL;
216
217
218
219
220
221 if (pmd_present(*pmd)) {
222 pte_free_kernel(new);
223 goto out;
224 }
225 pmd_populate_kernel(mm, pmd, new);
226 }
227out:
228 return pte_offset_kernel(pmd, address);
229}
230#define PTE_TABLE_MASK ((PTRS_PER_PTE-1) * sizeof(pte_t))
231#define PMD_TABLE_MASK ((PTRS_PER_PMD-1) * sizeof(pmd_t))
232
233
234
235
236
237
238
239
240
241
242
243
244int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
245 struct vm_area_struct *vma)
246{
247 pgd_t * src_pgd, * dst_pgd;
248 unsigned long address = vma->vm_start;
249 unsigned long end = vma->vm_end;
250 unsigned long cow;
251
252
253
254
255
256
257
258 if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) {
259 if (!vma->anon_vma)
260 return 0;
261 }
262
263 if (is_vm_hugetlb_page(vma))
264 return copy_hugetlb_page_range(dst, src, vma);
265
266 cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
267 src_pgd = pgd_offset(src, address)-1;
268 dst_pgd = pgd_offset(dst, address)-1;
269
270 for (;;) {
271 pmd_t * src_pmd, * dst_pmd;
272
273 src_pgd++; dst_pgd++;
274
275
276
277 if (pgd_none(*src_pgd))
278 goto skip_copy_pmd_range;
279 if (unlikely(pgd_bad(*src_pgd))) {
280 pgd_ERROR(*src_pgd);
281 pgd_clear(src_pgd);
282skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK;
283 if (!address || (address >= end))
284 goto out;
285 continue;
286 }
287
288 src_pmd = pmd_offset(src_pgd, address);
289 dst_pmd = pmd_alloc(dst, dst_pgd, address);
290 if (!dst_pmd)
291 goto nomem;
292
293 do {
294 pte_t * src_pte, * dst_pte;
295
296
297
298 if (pmd_none(*src_pmd))
299 goto skip_copy_pte_range;
300 if (unlikely(pmd_bad(*src_pmd))) {
301 pmd_ERROR(*src_pmd);
302 pmd_clear(src_pmd);
303skip_copy_pte_range:
304 address = (address + PMD_SIZE) & PMD_MASK;
305 if (address >= end)
306 goto out;
307 goto cont_copy_pmd_range;
308 }
309
310 dst_pte = pte_alloc_map(dst, dst_pmd, address);
311 if (!dst_pte)
312 goto nomem;
313 spin_lock(&src->page_table_lock);
314 src_pte = pte_offset_map_nested(src_pmd, address);
315 do {
316 pte_t pte = *src_pte;
317 struct page *page;
318 unsigned long pfn;
319
320
321
322 if (pte_none(pte))
323 goto cont_copy_pte_range_noset;
324
325 if (!pte_present(pte)) {
326 if (!pte_file(pte))
327 swap_duplicate(pte_to_swp_entry(pte));
328 set_pte(dst_pte, pte);
329 goto cont_copy_pte_range_noset;
330 }
331 pfn = pte_pfn(pte);
332
333
334
335
336
337 page = NULL;
338 if (pfn_valid(pfn))
339 page = pfn_to_page(pfn);
340
341 if (!page || PageReserved(page)) {
342 set_pte(dst_pte, pte);
343 goto cont_copy_pte_range_noset;
344 }
345
346
347
348
349
350 if (cow) {
351 ptep_set_wrprotect(src_pte);
352 pte = *src_pte;
353 }
354
355
356
357
358
359 if (vma->vm_flags & VM_SHARED)
360 pte = pte_mkclean(pte);
361 pte = pte_mkold(pte);
362 get_page(page);
363 dst->rss++;
364 if (PageAnon(page))
365 dst->anon_rss++;
366 set_pte(dst_pte, pte);
367 page_dup_rmap(page);
368cont_copy_pte_range_noset:
369 address += PAGE_SIZE;
370 if (address >= end) {
371 pte_unmap_nested(src_pte);
372 pte_unmap(dst_pte);
373 goto out_unlock;
374 }
375 src_pte++;
376 dst_pte++;
377 } while ((unsigned long)src_pte & PTE_TABLE_MASK);
378 pte_unmap_nested(src_pte-1);
379 pte_unmap(dst_pte-1);
380 spin_unlock(&src->page_table_lock);
381 cond_resched_lock(&dst->page_table_lock);
382cont_copy_pmd_range:
383 src_pmd++;
384 dst_pmd++;
385 } while ((unsigned long)src_pmd & PMD_TABLE_MASK);
386 }
387out_unlock:
388 spin_unlock(&src->page_table_lock);
389out:
390 return 0;
391nomem:
392 return -ENOMEM;
393}
394
395static void zap_pte_range(struct mmu_gather *tlb,
396 pmd_t *pmd, unsigned long address,
397 unsigned long size, struct zap_details *details)
398{
399 unsigned long offset;
400 pte_t *ptep;
401
402 if (pmd_none(*pmd))
403 return;
404 if (unlikely(pmd_bad(*pmd))) {
405 pmd_ERROR(*pmd);
406 pmd_clear(pmd);
407 return;
408 }
409 ptep = pte_offset_map(pmd, address);
410 offset = address & ~PMD_MASK;
411 if (offset + size > PMD_SIZE)
412 size = PMD_SIZE - offset;
413 size &= PAGE_MASK;
414 if (details && !details->check_mapping && !details->nonlinear_vma)
415 details = NULL;
416 for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
417 pte_t pte = *ptep;
418 if (pte_none(pte))
419 continue;
420 if (pte_present(pte)) {
421 struct page *page = NULL;
422 unsigned long pfn = pte_pfn(pte);
423 if (pfn_valid(pfn)) {
424 page = pfn_to_page(pfn);
425 if (PageReserved(page))
426 page = NULL;
427 }
428 if (unlikely(details) && page) {
429
430
431
432
433
434 if (details->check_mapping &&
435 details->check_mapping != page->mapping)
436 continue;
437
438
439
440
441 if (details->nonlinear_vma &&
442 (page->index < details->first_index ||
443 page->index > details->last_index))
444 continue;
445 }
446 pte = ptep_get_and_clear(ptep);
447 tlb_remove_tlb_entry(tlb, ptep, address+offset);
448 if (unlikely(!page))
449 continue;
450 if (unlikely(details) && details->nonlinear_vma
451 && linear_page_index(details->nonlinear_vma,
452 address+offset) != page->index)
453 set_pte(ptep, pgoff_to_pte(page->index));
454 if (pte_dirty(pte))
455 set_page_dirty(page);
456 if (PageAnon(page))
457 tlb->mm->anon_rss--;
458 else if (pte_young(pte))
459 mark_page_accessed(page);
460 tlb->freed++;
461 page_remove_rmap(page);
462 tlb_remove_page(tlb, page);
463 continue;
464 }
465
466
467
468
469 if (unlikely(details))
470 continue;
471 if (!pte_file(pte))
472 free_swap_and_cache(pte_to_swp_entry(pte));
473 pte_clear(ptep);
474 }
475 pte_unmap(ptep-1);
476}
477
478static void zap_pmd_range(struct mmu_gather *tlb,
479 pgd_t * dir, unsigned long address,
480 unsigned long size, struct zap_details *details)
481{
482 pmd_t * pmd;
483 unsigned long end, pgd_boundary;
484
485 if (pgd_none(*dir))
486 return;
487 if (unlikely(pgd_bad(*dir))) {
488 pgd_ERROR(*dir);
489 pgd_clear(dir);
490 return;
491 }
492 pmd = pmd_offset(dir, address);
493 end = address + size;
494 pgd_boundary = ((address + PGDIR_SIZE) & PGDIR_MASK);
495 if (pgd_boundary && (end > pgd_boundary))
496 end = pgd_boundary;
497 do {
498 zap_pte_range(tlb, pmd, address, end - address, details);
499 address = (address + PMD_SIZE) & PMD_MASK;
500 pmd++;
501 } while (address && (address < end));
502}
503
504static void unmap_page_range(struct mmu_gather *tlb,
505 struct vm_area_struct *vma, unsigned long address,
506 unsigned long end, struct zap_details *details)
507{
508 pgd_t * dir;
509
510 BUG_ON(address >= end);
511 dir = pgd_offset(vma->vm_mm, address);
512 tlb_start_vma(tlb, vma);
513 do {
514 zap_pmd_range(tlb, dir, address, end - address, details);
515 address = (address + PGDIR_SIZE) & PGDIR_MASK;
516 dir++;
517 } while (address && (address < end));
518 tlb_end_vma(tlb, vma);
519}
520
521#ifdef CONFIG_PREEMPT_VOLUNTARY
522# define ZAP_BLOCK_SIZE (128 * PAGE_SIZE)
523#else
524
525
526#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT)
527#define ZAP_BLOCK_SIZE (FREE_PTE_NR * PAGE_SIZE)
528#endif
529
530
531#if !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT)
532#define ZAP_BLOCK_SIZE (256 * PAGE_SIZE)
533#endif
534
535
536#if !defined(CONFIG_PREEMPT)
537#define ZAP_BLOCK_SIZE (1024 * PAGE_SIZE)
538#endif
539
540#endif
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
570 struct vm_area_struct *vma, unsigned long start_addr,
571 unsigned long end_addr, unsigned long *nr_accounted,
572 struct zap_details *details)
573{
574 unsigned long zap_bytes = ZAP_BLOCK_SIZE;
575 unsigned long tlb_start = 0;
576 int tlb_start_valid = 0;
577 int ret = 0;
578 int atomic = details && details->atomic;
579
580 for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
581 unsigned long start;
582 unsigned long end;
583
584 start = max(vma->vm_start, start_addr);
585 if (start >= vma->vm_end)
586 continue;
587 end = min(vma->vm_end, end_addr);
588 if (end <= vma->vm_start)
589 continue;
590
591 if (vma->vm_flags & VM_ACCOUNT)
592 *nr_accounted += (end - start) >> PAGE_SHIFT;
593
594 ret++;
595 while (start != end) {
596 unsigned long block;
597
598 if (!tlb_start_valid) {
599 tlb_start = start;
600 tlb_start_valid = 1;
601 }
602
603 if (is_vm_hugetlb_page(vma)) {
604 block = end - start;
605 unmap_hugepage_range(vma, start, end);
606 } else {
607 block = min(zap_bytes, end - start);
608 unmap_page_range(*tlbp, vma, start,
609 start + block, details);
610 }
611
612 start += block;
613 zap_bytes -= block;
614 if (!atomic && need_resched()) {
615 int fullmm = tlb_is_full_mm(*tlbp);
616 tlb_finish_mmu(*tlbp, tlb_start, start);
617 cond_resched_lock(&mm->page_table_lock);
618 *tlbp = tlb_gather_mmu(mm, fullmm);
619 tlb_start_valid = 0;
620 }
621 if ((long)zap_bytes > 0)
622 continue;
623 zap_bytes = ZAP_BLOCK_SIZE;
624 }
625 }
626 return ret;
627}
628
629
630
631
632
633
634
635
636void zap_page_range(struct vm_area_struct *vma, unsigned long address,
637 unsigned long size, struct zap_details *details)
638{
639 struct mm_struct *mm = vma->vm_mm;
640 struct mmu_gather *tlb;
641 unsigned long end = address + size;
642 unsigned long nr_accounted = 0;
643
644 if (is_vm_hugetlb_page(vma)) {
645 zap_hugepage_range(vma, address, size);
646 return;
647 }
648
649 lru_add_drain();
650 spin_lock(&mm->page_table_lock);
651 tlb = tlb_gather_mmu(mm, 0);
652 unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
653 tlb_finish_mmu(tlb, address, end);
654 spin_unlock(&mm->page_table_lock);
655}
656EXPORT_SYMBOL(zap_page_range);
657
658
659
660
661
662struct page *
663follow_page(struct mm_struct *mm, unsigned long address, int write)
664{
665 pgd_t *pgd;
666 pmd_t *pmd;
667 pte_t *ptep, pte;
668 unsigned long pfn;
669 struct page *page;
670
671 page = follow_huge_addr(mm, address, write);
672 if (! IS_ERR(page))
673 return page;
674
675 pgd = pgd_offset(mm, address);
676 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
677 goto out;
678
679 pmd = pmd_offset(pgd, address);
680 if (pmd_none(*pmd))
681 goto out;
682 if (pmd_huge(*pmd))
683 return follow_huge_pmd(mm, address, pmd, write);
684 if (unlikely(pmd_bad(*pmd)))
685 goto out;
686
687 ptep = pte_offset_map(pmd, address);
688 if (!ptep)
689 goto out;
690
691 pte = *ptep;
692 pte_unmap(ptep);
693 if (pte_present(pte)) {
694 if (write && !pte_write(pte))
695 goto out;
696 pfn = pte_pfn(pte);
697 if (pfn_valid(pfn)) {
698 page = pfn_to_page(pfn);
699 if (write && !pte_dirty(pte) && !PageDirty(page))
700 set_page_dirty(page);
701 mark_page_accessed(page);
702 return page;
703 }
704 }
705
706out:
707 return NULL;
708}
709
710struct page *
711follow_page_pte(struct mm_struct *mm, unsigned long address, int write,
712 pte_t *page_pte)
713{
714 pgd_t *pgd;
715 pmd_t *pmd;
716 pte_t *ptep, pte;
717 unsigned long pfn;
718 struct page *page;
719
720
721 memset(page_pte, 0, sizeof(*page_pte));
722 page = follow_huge_addr(mm, address, write);
723 if (!IS_ERR(page))
724 return page;
725
726 pgd = pgd_offset(mm, address);
727 if (pgd_none(*pgd) || pgd_bad(*pgd))
728 goto out;
729
730 pmd = pmd_offset(pgd, address);
731 if (pmd_none(*pmd))
732 goto out;
733 if (pmd_huge(*pmd))
734 return follow_huge_pmd(mm, address, pmd, write);
735 if (pmd_bad(*pmd))
736 goto out;
737
738 ptep = pte_offset_map(pmd, address);
739 if (!ptep)
740 goto out;
741
742 pte = *ptep;
743 pte_unmap(ptep);
744 if (pte_present(pte) && pte_read(pte)) {
745 if (write && !pte_write(pte))
746 goto out;
747 if (write && !pte_dirty(pte)) {
748 struct page *page = pte_page(pte);
749 if (!PageDirty(page))
750 set_page_dirty(page);
751 }
752 pfn = pte_pfn(pte);
753 if (pfn_valid(pfn)) {
754 struct page *page = pfn_to_page(pfn);
755
756 mark_page_accessed(page);
757 return page;
758 } else {
759 *page_pte = pte;
760 return NULL;
761 }
762 }
763
764out:
765 return NULL;
766}
767
768
769
770
771
772
773
774
775static inline struct page *get_page_map(struct page *page)
776{
777 if (!pfn_valid(page_to_pfn(page)))
778 return NULL;
779 return page;
780}
781
782
783#ifndef CONFIG_X86_4G
784static inline int
785untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma,
786 unsigned long address)
787{
788 pgd_t *pgd;
789 pmd_t *pmd;
790
791
792 if (vma->vm_ops && vma->vm_ops->nopage)
793 return 0;
794
795
796 pgd = pgd_offset(mm, address);
797 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
798 return 1;
799
800
801 pmd = pmd_offset(pgd, address);
802 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
803 return 1;
804
805
806 return 0;
807}
808#endif
809
810
811int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
812 unsigned long start, int len, int write, int force,
813 struct page **pages, struct vm_area_struct **vmas)
814{
815 int i;
816 unsigned int flags;
817
818
819
820
821
822 flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
823 flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
824 i = 0;
825
826 do {
827 struct vm_area_struct * vma;
828
829 vma = find_extend_vma(mm, start);
830 if (!vma && in_gate_area(tsk, start)) {
831 unsigned long pg = start & PAGE_MASK;
832 struct vm_area_struct *gate_vma = get_gate_vma(tsk);
833 pgd_t *pgd;
834 pmd_t *pmd;
835 pte_t *pte;
836 if (write)
837 return i ? : -EFAULT;
838 if (pg > TASK_SIZE)
839 pgd = pgd_offset_k(pg);
840 else
841 pgd = pgd_offset_gate(mm, pg);
842 BUG_ON(pgd_none(*pgd));
843 pmd = pmd_offset(pgd, pg);
844 BUG_ON(pmd_none(*pmd));
845 pte = pte_offset_map(pmd, pg);
846 BUG_ON(pte_none(*pte));
847 if (pages) {
848 pages[i] = pte_page(*pte);
849 get_page(pages[i]);
850 }
851 pte_unmap(pte);
852 if (vmas)
853 vmas[i] = gate_vma;
854 i++;
855 start += PAGE_SIZE;
856 len--;
857 continue;
858 }
859
860#ifdef CONFIG_XEN
861 if (vma && (vma->vm_flags & VM_FOREIGN)) {
862 struct page **map = vma->vm_private_data;
863 int offset = (start - vma->vm_start) >> PAGE_SHIFT;
864
865 if (map[offset] != NULL) {
866 if (pages)
867 pages[i] = map[offset];
868 if (vmas)
869 vmas[i] = vma;
870 i++;
871 start += PAGE_SIZE;
872 len--;
873 continue;
874 }
875 }
876#endif
877
878 if (!vma || (vma->vm_flags & VM_IO)
879 || !(flags & vma->vm_flags))
880 return i ? : -EFAULT;
881
882 if (is_vm_hugetlb_page(vma)) {
883 i = follow_hugetlb_page(mm, vma, pages, vmas,
884 &start, &len, i);
885 continue;
886 }
887 spin_lock(&mm->page_table_lock);
888 do {
889 struct page *map;
890 int lookup_write = write;
891 while (!(map = follow_page(mm, start, lookup_write))) {
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906#ifndef CONFIG_X86_4G
907 if (!lookup_write &&
908 untouched_anonymous_page(mm,vma,start)) {
909 map = ZERO_PAGE(start);
910 break;
911 }
912#endif
913 spin_unlock(&mm->page_table_lock);
914 switch (handle_mm_fault(mm,vma,start,write)) {
915 case VM_FAULT_MINOR:
916 tsk->min_flt++;
917 break;
918 case VM_FAULT_MAJOR:
919 tsk->maj_flt++;
920 break;
921 case VM_FAULT_SIGBUS:
922 return i ? i : -EFAULT;
923 case VM_FAULT_OOM:
924 return i ? i : -ENOMEM;
925 default:
926 BUG();
927 }
928
929
930
931
932
933
934
935 lookup_write = write && !force;
936 spin_lock(&mm->page_table_lock);
937 }
938 if (pages) {
939 pages[i] = get_page_map(map);
940 if (!pages[i]) {
941 spin_unlock(&mm->page_table_lock);
942 while (i--)
943 page_cache_release(pages[i]);
944 i = -EFAULT;
945 goto out;
946 }
947 flush_dcache_page(pages[i]);
948 if (!PageReserved(pages[i]))
949 page_cache_get(pages[i]);
950 }
951 if (vmas)
952 vmas[i] = vma;
953 i++;
954 start += PAGE_SIZE;
955 len--;
956 } while(len && start < vma->vm_end);
957 spin_unlock(&mm->page_table_lock);
958 } while(len);
959out:
960 return i;
961}
962
963EXPORT_SYMBOL(get_user_pages);
964
965static void zeromap_pte_range(pte_t * pte, unsigned long address,
966 unsigned long size, pgprot_t prot)
967{
968 unsigned long end;
969
970 address &= ~PMD_MASK;
971 end = address + size;
972 if (end > PMD_SIZE)
973 end = PMD_SIZE;
974 do {
975 pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot));
976 BUG_ON(!pte_none(*pte));
977 set_pte(pte, zero_pte);
978 address += PAGE_SIZE;
979 pte++;
980 } while (address && (address < end));
981}
982
983static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address,
984 unsigned long size, pgprot_t prot)
985{
986 unsigned long base, end;
987
988 base = address & PGDIR_MASK;
989 address &= ~PGDIR_MASK;
990 end = address + size;
991 if (end > PGDIR_SIZE)
992 end = PGDIR_SIZE;
993 do {
994 pte_t * pte = pte_alloc_map(mm, pmd, base + address);
995 if (!pte)
996 return -ENOMEM;
997 zeromap_pte_range(pte, base + address, end - address, prot);
998 pte_unmap(pte);
999 address = (address + PMD_SIZE) & PMD_MASK;
1000 pmd++;
1001 } while (address && (address < end));
1002 return 0;
1003}
1004
1005int zeromap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size, pgprot_t prot)
1006{
1007 int error = 0;
1008 pgd_t * dir;
1009 unsigned long beg = address;
1010 unsigned long end = address + size;
1011 struct mm_struct *mm = vma->vm_mm;
1012
1013 dir = pgd_offset(mm, address);
1014 flush_cache_range(vma, beg, end);
1015 if (address >= end)
1016 BUG();
1017
1018 spin_lock(&mm->page_table_lock);
1019 do {
1020 pmd_t *pmd = pmd_alloc(mm, dir, address);
1021 error = -ENOMEM;
1022 if (!pmd)
1023 break;
1024 error = zeromap_pmd_range(mm, pmd, address, end - address, prot);
1025 if (error)
1026 break;
1027 address = (address + PGDIR_SIZE) & PGDIR_MASK;
1028 dir++;
1029 } while (address && (address < end));
1030
1031
1032
1033 flush_tlb_range(vma, beg, end);
1034 spin_unlock(&mm->page_table_lock);
1035 return error;
1036}
1037
1038
1039
1040
1041
1042
1043static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
1044 unsigned long phys_addr, pgprot_t prot)
1045{
1046 unsigned long end;
1047 unsigned long pfn;
1048
1049 address &= ~PMD_MASK;
1050 end = address + size;
1051 if (end > PMD_SIZE)
1052 end = PMD_SIZE;
1053 pfn = phys_addr >> PAGE_SHIFT;
1054 do {
1055 BUG_ON(!pte_none(*pte));
1056 if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
1057 set_pte(pte, pfn_pte(pfn, prot));
1058 address += PAGE_SIZE;
1059 pfn++;
1060 pte++;
1061 } while (address && (address < end));
1062}
1063
1064static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size,
1065 unsigned long phys_addr, pgprot_t prot)
1066{
1067 unsigned long base, end;
1068
1069 base = address & PGDIR_MASK;
1070 address &= ~PGDIR_MASK;
1071 end = address + size;
1072 if (end > PGDIR_SIZE)
1073 end = PGDIR_SIZE;
1074 phys_addr -= address;
1075 do {
1076 pte_t * pte = pte_alloc_map(mm, pmd, base + address);
1077 if (!pte)
1078 return -ENOMEM;
1079 remap_pte_range(pte, base + address, end - address, address + phys_addr, prot);
1080 pte_unmap(pte);
1081 address = (address + PMD_SIZE) & PMD_MASK;
1082 pmd++;
1083 } while (address && (address < end));
1084 return 0;
1085}
1086
1087
1088int remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long phys_addr, unsigned long size, pgprot_t prot)
1089{
1090 int error = 0;
1091 pgd_t * dir;
1092 unsigned long beg = from;
1093 unsigned long end = from + size;
1094 struct mm_struct *mm = vma->vm_mm;
1095
1096 phys_addr -= from;
1097 dir = pgd_offset(mm, from);
1098 flush_cache_range(vma, beg, end);
1099 if (from >= end)
1100 BUG();
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110 vma->vm_flags |= VM_IO | VM_RESERVED;
1111
1112 spin_lock(&mm->page_table_lock);
1113 do {
1114 pmd_t *pmd = pmd_alloc(mm, dir, from);
1115 error = -ENOMEM;
1116 if (!pmd)
1117 break;
1118 error = remap_pmd_range(mm, pmd, from, end - from, phys_addr + from, prot);
1119 if (error)
1120 break;
1121 from = (from + PGDIR_SIZE) & PGDIR_MASK;
1122 dir++;
1123 } while (from && (from < end));
1124
1125
1126
1127 flush_tlb_range(vma, beg, end);
1128 spin_unlock(&mm->page_table_lock);
1129 return error;
1130}
1131
1132EXPORT_SYMBOL(remap_page_range);
1133
1134
1135
1136
1137
1138
1139
1140static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
1141{
1142 if (likely(vma->vm_flags & VM_WRITE))
1143 pte = pte_mkwrite(pte);
1144 return pte;
1145}
1146
1147
1148
1149
1150static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address,
1151 pte_t *page_table)
1152{
1153 pte_t entry;
1154
1155 flush_cache_page(vma, address);
1156 entry = maybe_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)),
1157 vma);
1158 lazy_mmu_prot_update(entry);
1159 ptep_establish(vma, address, page_table, entry);
1160 update_mmu_cache(vma, address, entry);
1161}
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1184 unsigned long address, pte_t *page_table, pmd_t *pmd, pte_t pte)
1185{
1186 struct page *old_page, *new_page;
1187 unsigned long pfn = pte_pfn(pte);
1188 pte_t entry;
1189
1190 if (unlikely(!pfn_valid(pfn))) {
1191
1192
1193
1194
1195
1196 pte_unmap(page_table);
1197 printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n",
1198 address);
1199 spin_unlock(&mm->page_table_lock);
1200 return VM_FAULT_OOM;
1201 }
1202 old_page = pfn_to_page(pfn);
1203
1204 if (!TestSetPageLocked(old_page)) {
1205 int reuse = can_share_swap_page(old_page);
1206 unlock_page(old_page);
1207 if (reuse) {
1208 flush_cache_page(vma, address);
1209 entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
1210 vma);
1211 ptep_set_access_flags(vma, address, page_table, entry, 1);
1212 update_mmu_cache(vma, address, entry);
1213 lazy_mmu_prot_update(entry);
1214 pte_unmap(page_table);
1215 spin_unlock(&mm->page_table_lock);
1216 return VM_FAULT_MINOR;
1217 }
1218 }
1219 pte_unmap(page_table);
1220
1221
1222
1223
1224 if (!PageReserved(old_page))
1225 page_cache_get(old_page);
1226 spin_unlock(&mm->page_table_lock);
1227
1228 if (unlikely(anon_vma_prepare(vma)))
1229 goto no_new_page;
1230 new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
1231 if (!new_page)
1232 goto no_new_page;
1233 copy_cow_page(old_page,new_page,address);
1234
1235
1236
1237
1238 spin_lock(&mm->page_table_lock);
1239 page_table = pte_offset_map(pmd, address);
1240 if (likely(pte_same(*page_table, pte))) {
1241 if (PageAnon(old_page))
1242 mm->anon_rss--;
1243 if (PageReserved(old_page))
1244 ++mm->rss;
1245 else
1246 page_remove_rmap(old_page);
1247 break_cow(vma, new_page, address, page_table);
1248 lru_cache_add_active(new_page);
1249 page_add_anon_rmap(new_page, vma, address);
1250
1251
1252 new_page = old_page;
1253 }
1254 pte_unmap(page_table);
1255 page_cache_release(new_page);
1256 page_cache_release(old_page);
1257 spin_unlock(&mm->page_table_lock);
1258 return VM_FAULT_MINOR;
1259
1260no_new_page:
1261 page_cache_release(old_page);
1262 return VM_FAULT_OOM;
1263}
1264
1265
1266
1267
1268static inline void unmap_mapping_range_list(struct prio_tree_root *root,
1269 struct zap_details *details)
1270{
1271 struct vm_area_struct *vma;
1272 struct prio_tree_iter iter;
1273 pgoff_t vba, vea, zba, zea;
1274
1275 vma_prio_tree_foreach(vma, &iter, root,
1276 details->first_index, details->last_index) {
1277 vba = vma->vm_pgoff;
1278 vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;
1279
1280 zba = details->first_index;
1281 if (zba < vba)
1282 zba = vba;
1283 zea = details->last_index;
1284 if (zea > vea)
1285 zea = vea;
1286 zap_page_range(vma,
1287 ((zba - vba) << PAGE_SHIFT) + vma->vm_start,
1288 (zea - zba + 1) << PAGE_SHIFT, details);
1289 }
1290}
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308void unmap_mapping_range(struct address_space *mapping,
1309 loff_t const holebegin, loff_t const holelen, int even_cows)
1310{
1311 struct zap_details details;
1312 pgoff_t hba = holebegin >> PAGE_SHIFT;
1313 pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
1314
1315
1316 if (sizeof(holelen) > sizeof(hlen)) {
1317 long long holeend =
1318 (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
1319 if (holeend & ~(long long)ULONG_MAX)
1320 hlen = ULONG_MAX - hba + 1;
1321 }
1322
1323 details.check_mapping = even_cows? NULL: mapping;
1324 details.nonlinear_vma = NULL;
1325 details.first_index = hba;
1326 details.last_index = hba + hlen - 1;
1327 details.atomic = 1;
1328 if (details.last_index < details.first_index)
1329 details.last_index = ULONG_MAX;
1330
1331 spin_lock(&mapping->i_mmap_lock);
1332
1333 atomic_inc(&mapping->truncate_count);
1334
1335 if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
1336 unmap_mapping_range_list(&mapping->i_mmap, &details);
1337
1338
1339
1340
1341
1342
1343
1344 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) {
1345 struct vm_area_struct *vma;
1346 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1347 shared.vm_set.list) {
1348 details.nonlinear_vma = vma;
1349 zap_page_range(vma, vma->vm_start,
1350 vma->vm_end - vma->vm_start, &details);
1351 }
1352 }
1353 spin_unlock(&mapping->i_mmap_lock);
1354}
1355EXPORT_SYMBOL(unmap_mapping_range);
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365int vmtruncate(struct inode * inode, loff_t offset)
1366{
1367 struct address_space *mapping = inode->i_mapping;
1368 unsigned long limit;
1369
1370 if (inode->i_size < offset)
1371 goto do_expand;
1372
1373
1374
1375
1376 if (IS_SWAPFILE(inode))
1377 goto out_busy;
1378 i_size_write(inode, offset);
1379 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
1380 truncate_inode_pages(mapping, offset);
1381 goto out_truncate;
1382
1383do_expand:
1384 limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
1385 if (limit != RLIM_INFINITY && offset > limit)
1386 goto out_sig;
1387 if (offset > inode->i_sb->s_maxbytes)
1388 goto out_big;
1389 i_size_write(inode, offset);
1390
1391out_truncate:
1392 if (inode->i_op && inode->i_op->truncate)
1393 inode->i_op->truncate(inode);
1394 return 0;
1395out_sig:
1396 send_sig(SIGXFSZ, current, 0);
1397out_big:
1398 return -EFBIG;
1399out_busy:
1400 return -ETXTBSY;
1401}
1402
1403EXPORT_SYMBOL(vmtruncate);
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416void swapin_readahead(swp_entry_t entry, unsigned long addr,struct vm_area_struct *vma)
1417{
1418#ifdef CONFIG_NUMA
1419 struct vm_area_struct *next_vma = vma ? vma->vm_next : NULL;
1420#endif
1421 int i, num;
1422 struct page *new_page;
1423 unsigned long offset;
1424
1425
1426
1427
1428 num = valid_swaphandles(entry, &offset);
1429 for (i = 0; i < num; offset++, i++) {
1430
1431 new_page = read_swap_cache_async(swp_entry(swp_type(entry),
1432 offset), vma, addr);
1433 if (!new_page)
1434 break;
1435 page_cache_release(new_page);
1436#ifdef CONFIG_NUMA
1437
1438
1439
1440 addr += PAGE_SIZE;
1441 if (addr == 0)
1442 vma = NULL;
1443 if (vma) {
1444 if (addr >= vma->vm_end) {
1445 vma = next_vma;
1446 next_vma = vma ? vma->vm_next : NULL;
1447 }
1448 if (vma && addr < vma->vm_start)
1449 vma = NULL;
1450 } else {
1451 if (next_vma && addr >= next_vma->vm_start) {
1452 vma = next_vma;
1453 next_vma = vma->vm_next;
1454 }
1455 }
1456#endif
1457 }
1458 lru_add_drain();
1459}
1460
1461#ifdef CONFIG_XEN
1462#ifndef pgd_addr_end
1463#define pgd_addr_end(addr, end) \
1464({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
1465 (__boundary - 1 < (end) - 1)? __boundary: (end); \
1466})
1467#endif
1468
1469#ifndef pmd_addr_end
1470#define pmd_addr_end(addr, end) \
1471({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
1472 (__boundary - 1 < (end) - 1)? __boundary: (end); \
1473})
1474#endif
1475
1476
1477static inline int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
1478 unsigned long addr, unsigned long end,
1479 pte_fn_t fn, void *data)
1480{
1481 pte_t *pte;
1482 int err;
1483 struct page *pmd_page;
1484
1485 if (mm == &init_mm)
1486 pte = pte_alloc_kernel(mm, pmd, addr);
1487 else
1488 pte = pte_alloc_map(mm, pmd, addr);
1489
1490 if (!pte)
1491 return -ENOMEM;
1492
1493 BUG_ON(pmd_huge(*pmd));
1494
1495 pmd_page = pmd_page(*pmd);
1496
1497 do {
1498 err = fn(pte, pmd_page, addr, data);
1499 if (err)
1500 break;
1501 } while (pte++, addr += PAGE_SIZE, addr != end);
1502
1503 if (mm != &init_mm)
1504 pte_unmap(pte);
1505 return err;
1506}
1507
1508static inline int apply_to_pmd_range(struct mm_struct *mm, pgd_t *pgd,
1509 unsigned long addr, unsigned long end,
1510 pte_fn_t fn, void *data)
1511{
1512 pmd_t *pmd;
1513 unsigned long next;
1514 int err;
1515
1516 pmd = pmd_alloc(mm, pgd, addr);
1517 if (!pmd)
1518 return -ENOMEM;
1519 do {
1520 next = pmd_addr_end(addr, end);
1521 err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
1522 if (err)
1523 break;
1524 } while (pmd++, addr = next, addr != end);
1525 return err;
1526}
1527
1528
1529
1530
1531
1532int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
1533 unsigned long size, pte_fn_t fn, void *data)
1534{
1535 pgd_t *pgd;
1536 unsigned long next;
1537 unsigned long end = addr + size;
1538 int err;
1539
1540 BUG_ON(addr >= end);
1541#ifdef __x86_64__
1542 if (mm == &init_mm)
1543 pgd = pgd_offset_k(addr);
1544 else
1545#endif
1546 pgd = pgd_offset(mm, addr);
1547 spin_lock(&mm->page_table_lock);
1548 do {
1549 next = pgd_addr_end(addr, end);
1550 err = apply_to_pmd_range(mm, pgd, addr, next, fn, data);
1551 if (err)
1552 break;
1553 } while (pgd++, addr = next, addr != end);
1554 spin_unlock(&mm->page_table_lock);
1555 return err;
1556}
1557EXPORT_SYMBOL_GPL(apply_to_page_range);
1558#undef pgd_addr_end
1559#undef pmd_addr_end
1560#endif
1561
1562
1563
1564
1565
1566static int do_swap_page(struct mm_struct * mm,
1567 struct vm_area_struct * vma, unsigned long address,
1568 pte_t *page_table, pmd_t *pmd, pte_t orig_pte, int write_access)
1569{
1570 struct page *page;
1571 swp_entry_t entry = pte_to_swp_entry(orig_pte);
1572 pte_t pte;
1573 int ret = VM_FAULT_MINOR;
1574
1575 pte_unmap(page_table);
1576 spin_unlock(&mm->page_table_lock);
1577 page = lookup_swap_cache(entry);
1578 if (!page) {
1579 swapin_readahead(entry, address, vma);
1580 page = read_swap_cache_async(entry, vma, address);
1581 if (!page) {
1582
1583
1584
1585
1586 spin_lock(&mm->page_table_lock);
1587 page_table = pte_offset_map(pmd, address);
1588 if (likely(pte_same(*page_table, orig_pte)))
1589 ret = VM_FAULT_OOM;
1590 else
1591 ret = VM_FAULT_MINOR;
1592 pte_unmap(page_table);
1593 spin_unlock(&mm->page_table_lock);
1594 goto out;
1595 }
1596
1597
1598 ret = VM_FAULT_MAJOR;
1599 inc_page_state(pgmajfault);
1600 grab_swap_token();
1601 }
1602
1603 mark_page_accessed(page);
1604 lock_page(page);
1605
1606
1607
1608
1609
1610 spin_lock(&mm->page_table_lock);
1611 page_table = pte_offset_map(pmd, address);
1612 if (unlikely(!pte_same(*page_table, orig_pte))) {
1613 ret = VM_FAULT_MINOR;
1614 goto out_nomap;
1615 }
1616
1617 if (unlikely(!PageUptodate(page))) {
1618 ret = VM_FAULT_SIGBUS;
1619 goto out_nomap;
1620 }
1621
1622
1623
1624 swap_free(entry);
1625 if (vm_swap_full())
1626 remove_exclusive_swap_page(page);
1627
1628 mm->rss++;
1629 pte = mk_pte(page, vma->vm_page_prot);
1630 if (write_access && can_share_swap_page(page)) {
1631 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
1632 write_access = 0;
1633 }
1634
1635 flush_icache_page(vma, page);
1636 set_pte(page_table, pte);
1637 page_add_anon_rmap(page, vma, address);
1638 unlock_page(page);
1639
1640 if (write_access) {
1641 if (do_wp_page(mm, vma, address,
1642 page_table, pmd, pte) == VM_FAULT_OOM)
1643 ret = VM_FAULT_OOM;
1644 goto out;
1645 }
1646
1647
1648 update_mmu_cache(vma, address, pte);
1649 lazy_mmu_prot_update(pte);
1650 pte_unmap(page_table);
1651 spin_unlock(&mm->page_table_lock);
1652out:
1653 return ret;
1654out_nomap:
1655 pte_unmap(page_table);
1656 spin_unlock(&mm->page_table_lock);
1657 unlock_page(page);
1658 page_cache_release(page);
1659 goto out;
1660
1661}
1662
1663
1664
1665
1666
1667
1668static int
1669do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
1670 pte_t *page_table, pmd_t *pmd, int write_access,
1671 unsigned long addr)
1672{
1673 pte_t entry;
1674 struct page * page = ZERO_PAGE(addr);
1675
1676
1677 entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
1678
1679
1680 if (write_access) {
1681
1682 pte_unmap(page_table);
1683 spin_unlock(&mm->page_table_lock);
1684
1685 if (unlikely(anon_vma_prepare(vma)))
1686 goto no_mem;
1687 page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
1688 if (!page)
1689 goto no_mem;
1690 clear_user_highpage(page, addr);
1691
1692 spin_lock(&mm->page_table_lock);
1693 page_table = pte_offset_map(pmd, addr);
1694
1695 if (!pte_none(*page_table)) {
1696 pte_unmap(page_table);
1697 page_cache_release(page);
1698 spin_unlock(&mm->page_table_lock);
1699 goto out;
1700 }
1701 mm->rss++;
1702 entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
1703 vma->vm_page_prot)),
1704 vma);
1705 lru_cache_add_active(page);
1706 mark_page_accessed(page);
1707 page_add_anon_rmap(page, vma, addr);
1708 }
1709
1710 set_pte(page_table, entry);
1711 pte_unmap(page_table);
1712
1713
1714 update_mmu_cache(vma, addr, entry);
1715 lazy_mmu_prot_update(entry);
1716 spin_unlock(&mm->page_table_lock);
1717out:
1718 return VM_FAULT_MINOR;
1719no_mem:
1720 return VM_FAULT_OOM;
1721}
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735static int
1736do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
1737 unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd)
1738{
1739 struct page * new_page;
1740 struct address_space *mapping = NULL;
1741 pte_t entry;
1742 int sequence = 0;
1743 int ret = VM_FAULT_MINOR;
1744 int anon = 0;
1745
1746 if (!vma->vm_ops || !vma->vm_ops->nopage)
1747 return do_anonymous_page(mm, vma, page_table,
1748 pmd, write_access, address);
1749 pte_unmap(page_table);
1750 spin_unlock(&mm->page_table_lock);
1751
1752 if (vma->vm_file) {
1753 mapping = vma->vm_file->f_mapping;
1754 sequence = atomic_read(&mapping->truncate_count);
1755 }
1756 smp_rmb();
1757retry:
1758 new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
1759
1760
1761 if (new_page == NOPAGE_SIGBUS)
1762 return VM_FAULT_SIGBUS;
1763 if (new_page == NOPAGE_OOM)
1764 return VM_FAULT_OOM;
1765
1766
1767
1768
1769 if (write_access && !(vma->vm_flags & VM_SHARED)) {
1770 struct page *page;
1771
1772 if (unlikely(anon_vma_prepare(vma)))
1773 goto oom;
1774 page = alloc_page_vma(GFP_HIGHUSER, vma, address);
1775 if (!page)
1776 goto oom;
1777 copy_user_highpage(page, new_page, address);
1778 page_cache_release(new_page);
1779 new_page = page;
1780 anon = 1;
1781 }
1782
1783 lock_page(new_page);
1784
1785 spin_lock(&mm->page_table_lock);
1786
1787
1788
1789
1790
1791 if (mapping &&
1792 (unlikely(sequence != atomic_read(&mapping->truncate_count)))) {
1793 spin_unlock(&mm->page_table_lock);
1794 unlock_page(new_page);
1795 page_cache_release(new_page);
1796 sequence = atomic_read(&mapping->truncate_count);
1797 goto retry;
1798 }
1799 page_table = pte_offset_map(pmd, address);
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812 if (pte_none(*page_table)) {
1813 if (!PageReserved(new_page))
1814 ++mm->rss;
1815 flush_icache_page(vma, new_page);
1816 entry = mk_pte(new_page, vma->vm_page_prot);
1817 if (write_access)
1818 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
1819 lazy_mmu_prot_update(entry);
1820 set_pte(page_table, entry);
1821 if (anon) {
1822 lru_cache_add_active(new_page);
1823 page_add_anon_rmap(new_page, vma, address);
1824 } else
1825 page_add_file_rmap(new_page);
1826 pte_unmap(page_table);
1827 unlock_page(new_page);
1828 } else {
1829
1830 pte_unmap(page_table);
1831 unlock_page(new_page);
1832 page_cache_release(new_page);
1833 spin_unlock(&mm->page_table_lock);
1834 goto out;
1835 }
1836
1837
1838 update_mmu_cache(vma, address, entry);
1839 spin_unlock(&mm->page_table_lock);
1840out:
1841 return ret;
1842oom:
1843 page_cache_release(new_page);
1844 ret = VM_FAULT_OOM;
1845 goto out;
1846}
1847
1848
1849
1850
1851
1852
1853static int do_file_page(struct mm_struct * mm, struct vm_area_struct * vma,
1854 unsigned long address, int write_access, pte_t *pte, pmd_t *pmd)
1855{
1856 unsigned long pgoff;
1857 int err;
1858
1859 BUG_ON(!vma->vm_ops || !vma->vm_ops->nopage);
1860
1861
1862
1863
1864 if (!vma->vm_ops || !vma->vm_ops->populate ||
1865 (write_access && !(vma->vm_flags & VM_SHARED))) {
1866 pte_clear(pte);
1867 return do_no_page(mm, vma, address, write_access, pte, pmd);
1868 }
1869
1870 pgoff = pte_to_pgoff(*pte);
1871
1872 pte_unmap(pte);
1873 spin_unlock(&mm->page_table_lock);
1874
1875 err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, vma->vm_page_prot, pgoff, 0);
1876 if (err == -ENOMEM)
1877 return VM_FAULT_OOM;
1878 if (err)
1879 return VM_FAULT_SIGBUS;
1880 return VM_FAULT_MAJOR;
1881}
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904static inline int handle_pte_fault(struct mm_struct *mm,
1905 struct vm_area_struct * vma, unsigned long address,
1906 int write_access, pte_t *pte, pmd_t *pmd)
1907{
1908 pte_t entry;
1909
1910 entry = *pte;
1911 if (!pte_present(entry)) {
1912
1913
1914
1915
1916
1917 if (pte_none(entry))
1918 return do_no_page(mm, vma, address, write_access, pte, pmd);
1919 if (pte_file(entry))
1920 return do_file_page(mm, vma, address, write_access, pte, pmd);
1921 return do_swap_page(mm, vma, address, pte, pmd, entry, write_access);
1922 }
1923
1924 if (write_access) {
1925 if (!pte_write(entry))
1926 return do_wp_page(mm, vma, address, pte, pmd, entry);
1927
1928 entry = pte_mkdirty(entry);
1929 }
1930 entry = pte_mkyoung(entry);
1931 ptep_set_access_flags(vma, address, pte, entry, write_access);
1932 update_mmu_cache(vma, address, entry);
1933 lazy_mmu_prot_update(entry);
1934 pte_unmap(pte);
1935 spin_unlock(&mm->page_table_lock);
1936 return VM_FAULT_MINOR;
1937}
1938
1939
1940
1941
1942int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
1943 unsigned long address, int write_access)
1944{
1945 pgd_t *pgd;
1946 pmd_t *pmd;
1947
1948 __set_current_state(TASK_RUNNING);
1949 pgd = pgd_offset(mm, address);
1950
1951 inc_page_state(pgfault);
1952
1953 if (is_vm_hugetlb_page(vma))
1954 return hugetlb_fault(mm, vma, address, write_access);
1955
1956
1957
1958
1959
1960 spin_lock(&mm->page_table_lock);
1961 pmd = pmd_alloc(mm, pgd, address);
1962
1963 if (pmd) {
1964 pte_t * pte = pte_alloc_map(mm, pmd, address);
1965 if (pte)
1966 return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
1967 }
1968 spin_unlock(&mm->page_table_lock);
1969 return VM_FAULT_OOM;
1970}
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
1982{
1983 pmd_t *new;
1984
1985 spin_unlock(&mm->page_table_lock);
1986 new = pmd_alloc_one(mm, address);
1987 spin_lock(&mm->page_table_lock);
1988 if (!new)
1989 return NULL;
1990
1991
1992
1993
1994
1995 if (pgd_present(*pgd)) {
1996 pmd_free(new);
1997 goto out;
1998 }
1999 pgd_populate(mm, pgd, new);
2000out:
2001 return pmd_offset(pgd, address);
2002}
2003
2004int make_pages_present(unsigned long addr, unsigned long end)
2005{
2006 int ret, len, write;
2007 struct vm_area_struct * vma;
2008
2009 vma = find_vma(current->mm, addr);
2010 if (!vma)
2011 return -1;
2012 write = (vma->vm_flags & VM_WRITE) != 0;
2013 if (addr >= end)
2014 BUG();
2015 if (end > vma->vm_end)
2016 BUG();
2017 len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE;
2018 ret = get_user_pages(current, current->mm, addr,
2019 len, write, 0, NULL, NULL);
2020 if (ret < 0)
2021 return ret;
2022 return ret == len ? 0 : -1;
2023}
2024
2025
2026
2027
2028struct page * vmalloc_to_page(void * vmalloc_addr)
2029{
2030 unsigned long addr = (unsigned long) vmalloc_addr;
2031 struct page *page = NULL;
2032 pgd_t *pgd = pgd_offset_k(addr);
2033 pmd_t *pmd;
2034 pte_t *ptep, pte;
2035
2036 if (!pgd_none(*pgd)) {
2037 pmd = pmd_offset(pgd, addr);
2038 if (!pmd_none(*pmd)) {
2039 preempt_disable();
2040 ptep = pte_offset_map(pmd, addr);
2041 pte = *ptep;
2042 if (pte_present(pte))
2043 page = pte_page(pte);
2044 pte_unmap(ptep);
2045 preempt_enable();
2046 }
2047 }
2048 return page;
2049}
2050
2051EXPORT_SYMBOL(vmalloc_to_page);
2052
2053#if !defined(CONFIG_ARCH_GATE_AREA)
2054
2055#if defined(AT_SYSINFO_EHDR)
2056struct vm_area_struct gate_vma;
2057
2058static int __init gate_vma_init(void)
2059{
2060 gate_vma.vm_mm = NULL;
2061 gate_vma.vm_start = FIXADDR_USER_START;
2062 gate_vma.vm_end = FIXADDR_USER_END;
2063 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
2064 gate_vma.vm_page_prot = __P101;
2065 return 0;
2066}
2067__initcall(gate_vma_init);
2068#endif
2069
2070struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
2071{
2072#ifdef AT_SYSINFO_EHDR
2073 return &gate_vma;
2074#else
2075 return NULL;
2076#endif
2077}
2078
2079int in_gate_area(struct task_struct *task, unsigned long addr)
2080{
2081#ifdef AT_SYSINFO_EHDR
2082 if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
2083 return 1;
2084#endif
2085 return 0;
2086}
2087
2088#endif
2089