1
2
3
4
5
6
7
8
9#include <linux/slab.h>
10#include <linux/shm.h>
11#include <linux/mman.h>
12#include <linux/pagemap.h>
13#include <linux/swap.h>
14#include <linux/syscalls.h>
15#include <linux/init.h>
16#include <linux/file.h>
17#include <linux/fs.h>
18#include <linux/personality.h>
19#include <linux/security.h>
20#include <linux/hugetlb.h>
21#include <linux/profile.h>
22#include <linux/module.h>
23#include <linux/mount.h>
24#include <linux/mempolicy.h>
25#include <linux/rmap.h>
26#include <linux/random.h>
27
28#include <asm/uaccess.h>
29#include <asm/cacheflush.h>
30#include <asm/tlb.h>
31
32#ifndef arch_mmap_check
33#define arch_mmap_check(addr, len, flags) (0)
34#endif
35
36
37
38
39
40#undef DEBUG_MM_RB
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57pgprot_t protection_map[16] = {
58 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
59 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
60};
61
62int sysctl_overcommit_memory = OVERCOMMIT_GUESS;
63int sysctl_overcommit_ratio = 50;
64int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
65atomic_t vm_committed_space = ATOMIC_INIT(0);
66
67#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
68extern int sysctl_legacy_va_layout;
69#endif
70
71EXPORT_SYMBOL(sysctl_overcommit_memory);
72EXPORT_SYMBOL(sysctl_overcommit_ratio);
73EXPORT_SYMBOL(sysctl_max_map_count);
74EXPORT_SYMBOL(vm_committed_space);
75
76
77
78
79static void __remove_shared_vm_struct(struct vm_area_struct *vma,
80 struct file *file, struct address_space *mapping)
81{
82 if (vma->vm_flags & VM_DENYWRITE)
83 atomic_inc(&file->f_dentry->d_inode->i_writecount);
84 if (vma->vm_flags & VM_SHARED)
85 mapping->i_mmap_writable--;
86
87 flush_dcache_mmap_lock(mapping);
88 if (unlikely(vma->vm_flags & VM_NONLINEAR))
89 list_del_init(&vma->shared.vm_set.list);
90 else
91 vma_prio_tree_remove(vma, &mapping->i_mmap);
92 flush_dcache_mmap_unlock(mapping);
93}
94
95
96
97
98static void remove_vm_struct(struct vm_area_struct *vma)
99{
100 struct file *file = vma->vm_file;
101
102 might_sleep();
103 if (file) {
104 struct address_space *mapping = file->f_mapping;
105 spin_lock(&mapping->i_mmap_lock);
106 __remove_shared_vm_struct(vma, file, mapping);
107 spin_unlock(&mapping->i_mmap_lock);
108 }
109 if (vma->vm_ops && vma->vm_ops->close)
110 vma->vm_ops->close(vma);
111 if (file)
112 fput(file);
113 anon_vma_unlink(vma);
114 mpol_free(vma_policy(vma));
115 kmem_cache_free(vm_area_cachep, vma);
116}
117
118
119
120
121
122
123
124
125asmlinkage unsigned long sys_brk(unsigned long brk)
126{
127 unsigned long rlim, retval;
128 unsigned long newbrk, oldbrk;
129 struct mm_struct *mm = current->mm;
130
131 down_write(&mm->mmap_sem);
132
133 if (brk < mm->end_code)
134 goto out;
135 newbrk = PAGE_ALIGN(brk);
136 oldbrk = PAGE_ALIGN(mm->brk);
137 if (oldbrk == newbrk)
138 goto set_brk;
139
140
141 if (brk <= mm->brk) {
142 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
143 goto set_brk;
144 goto out;
145 }
146
147
148 rlim = current->rlim[RLIMIT_DATA].rlim_cur;
149 if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
150 goto out;
151
152
153 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
154 goto out;
155
156
157 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
158 goto out;
159set_brk:
160 mm->brk = brk;
161out:
162 retval = mm->brk;
163 up_write(&mm->mmap_sem);
164 return retval;
165}
166
167#ifdef DEBUG_MM_RB
168static int browse_rb(struct rb_root *root)
169{
170 int i = 0, j;
171 struct rb_node *nd, *pn = NULL;
172 unsigned long prev = 0, pend = 0;
173
174 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
175 struct vm_area_struct *vma;
176 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
177 if (vma->vm_start < prev)
178 printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;
179 if (vma->vm_start < pend)
180 printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
181 if (vma->vm_start > vma->vm_end)
182 printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
183 i++;
184 pn = nd;
185 }
186 j = 0;
187 for (nd = pn; nd; nd = rb_prev(nd)) {
188 j++;
189 }
190 if (i != j)
191 printk("backwards %d, forwards %d\n", j, i), i = 0;
192 return i;
193}
194
195void validate_mm(struct mm_struct *mm)
196{
197 int bug = 0;
198 int i = 0;
199 struct vm_area_struct *tmp = mm->mmap;
200 while (tmp) {
201 tmp = tmp->vm_next;
202 i++;
203 }
204 if (i != mm->map_count)
205 printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
206 i = browse_rb(&mm->mm_rb);
207 if (i != mm->map_count)
208 printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
209 if (bug)
210 BUG();
211}
212#else
213#define validate_mm(mm) do { } while (0)
214#endif
215
216static struct vm_area_struct *
217find_vma_prepare(struct mm_struct *mm, unsigned long addr,
218 struct vm_area_struct **pprev, struct rb_node ***rb_link,
219 struct rb_node ** rb_parent)
220{
221 struct vm_area_struct * vma;
222 struct rb_node ** __rb_link, * __rb_parent, * rb_prev;
223
224 __rb_link = &mm->mm_rb.rb_node;
225 rb_prev = __rb_parent = NULL;
226 vma = NULL;
227
228 while (*__rb_link) {
229 struct vm_area_struct *vma_tmp;
230
231 __rb_parent = *__rb_link;
232 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
233
234 if (vma_tmp->vm_end > addr) {
235 vma = vma_tmp;
236 if (vma_tmp->vm_start <= addr)
237 return vma;
238 __rb_link = &__rb_parent->rb_left;
239 } else {
240 rb_prev = __rb_parent;
241 __rb_link = &__rb_parent->rb_right;
242 }
243 }
244
245 *pprev = NULL;
246 if (rb_prev)
247 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
248 *rb_link = __rb_link;
249 *rb_parent = __rb_parent;
250 return vma;
251}
252
253static inline void
254__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
255 struct vm_area_struct *prev, struct rb_node *rb_parent)
256{
257 if (vma->vm_flags & VM_EXEC)
258 arch_add_exec_range(mm, vma->vm_end);
259 if (prev) {
260 vma->vm_next = prev->vm_next;
261 prev->vm_next = vma;
262 } else {
263 mm->mmap = vma;
264 if (rb_parent)
265 vma->vm_next = rb_entry(rb_parent,
266 struct vm_area_struct, vm_rb);
267 else
268 vma->vm_next = NULL;
269 }
270}
271
272void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
273 struct rb_node **rb_link, struct rb_node *rb_parent)
274{
275 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
276 rb_insert_color(&vma->vm_rb, &mm->mm_rb);
277}
278
279static inline void __vma_link_file(struct vm_area_struct *vma)
280{
281 struct file * file;
282
283 file = vma->vm_file;
284 if (file) {
285 struct address_space *mapping = file->f_mapping;
286
287 if (vma->vm_flags & VM_DENYWRITE)
288 atomic_dec(&file->f_dentry->d_inode->i_writecount);
289 if (vma->vm_flags & VM_SHARED)
290 mapping->i_mmap_writable++;
291
292 flush_dcache_mmap_lock(mapping);
293 if (unlikely(vma->vm_flags & VM_NONLINEAR))
294 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
295 else
296 vma_prio_tree_insert(vma, &mapping->i_mmap);
297 flush_dcache_mmap_unlock(mapping);
298 }
299}
300
301static void
302__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
303 struct vm_area_struct *prev, struct rb_node **rb_link,
304 struct rb_node *rb_parent)
305{
306 __vma_link_list(mm, vma, prev, rb_parent);
307 __vma_link_rb(mm, vma, rb_link, rb_parent);
308 __anon_vma_link(vma);
309}
310
311static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
312 struct vm_area_struct *prev, struct rb_node **rb_link,
313 struct rb_node *rb_parent)
314{
315 struct address_space *mapping = NULL;
316
317 if (vma->vm_file)
318 mapping = vma->vm_file->f_mapping;
319
320 if (mapping)
321 spin_lock(&mapping->i_mmap_lock);
322 anon_vma_lock(vma);
323
324 __vma_link(mm, vma, prev, rb_link, rb_parent);
325 __vma_link_file(vma);
326
327 anon_vma_unlock(vma);
328 if (mapping)
329 spin_unlock(&mapping->i_mmap_lock);
330
331 mm->map_count++;
332 validate_mm(mm);
333}
334
335
336
337
338
339
340static void
341__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
342{
343 struct vm_area_struct * __vma, * prev;
344 struct rb_node ** rb_link, * rb_parent;
345
346 __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
347 if (__vma && __vma->vm_start < vma->vm_end)
348 BUG();
349 __vma_link(mm, vma, prev, rb_link, rb_parent);
350 mm->map_count++;
351}
352
353static inline void
354__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
355 struct vm_area_struct *prev)
356{
357 prev->vm_next = vma->vm_next;
358 rb_erase(&vma->vm_rb, &mm->mm_rb);
359 if (mm->mmap_cache == vma)
360 mm->mmap_cache = prev;
361 if (vma->vm_flags & VM_EXEC)
362 arch_remove_exec_range(mm, vma->vm_end);
363}
364
365
366
367
368
369
370
371
372void vma_adjust(struct vm_area_struct *vma, unsigned long start,
373 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
374{
375 struct mm_struct *mm = vma->vm_mm;
376 struct vm_area_struct *next = vma->vm_next;
377 struct vm_area_struct *importer = NULL;
378 struct address_space *mapping = NULL;
379 struct prio_tree_root *root = NULL;
380 struct file *file = vma->vm_file;
381 struct anon_vma *anon_vma = NULL;
382 long adjust_next = 0;
383 int remove_next = 0;
384
385 if (next && !insert) {
386 if (end >= next->vm_end) {
387
388
389
390
391again: remove_next = 1 + (end > next->vm_end);
392 end = next->vm_end;
393 anon_vma = next->anon_vma;
394 } else if (end > next->vm_start) {
395
396
397
398
399 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
400 anon_vma = next->anon_vma;
401 importer = vma;
402 } else if (end < vma->vm_end) {
403
404
405
406
407
408 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
409 anon_vma = next->anon_vma;
410 importer = next;
411 }
412 }
413
414 if (file) {
415 mapping = file->f_mapping;
416 if (!(vma->vm_flags & VM_NONLINEAR))
417 root = &mapping->i_mmap;
418 spin_lock(&mapping->i_mmap_lock);
419 if (insert) {
420
421
422
423
424
425
426 __vma_link_file(insert);
427 }
428 }
429
430
431
432
433
434 if (vma->anon_vma)
435 anon_vma = vma->anon_vma;
436 if (anon_vma) {
437 spin_lock(&anon_vma->lock);
438
439
440
441
442
443 if (importer && !importer->anon_vma) {
444 importer->anon_vma = anon_vma;
445 __anon_vma_link(importer);
446 }
447 }
448
449 if (root) {
450 flush_dcache_mmap_lock(mapping);
451 vma_prio_tree_remove(vma, root);
452 if (adjust_next)
453 vma_prio_tree_remove(next, root);
454 }
455
456 vma->vm_start = start;
457 vma->vm_end = end;
458 vma->vm_pgoff = pgoff;
459 if (adjust_next) {
460 next->vm_start += adjust_next << PAGE_SHIFT;
461 next->vm_pgoff += adjust_next;
462 }
463
464 if (root) {
465 if (adjust_next)
466 vma_prio_tree_insert(next, root);
467 vma_prio_tree_insert(vma, root);
468 flush_dcache_mmap_unlock(mapping);
469 }
470
471 if (remove_next) {
472
473
474
475
476 __vma_unlink(mm, next, vma);
477 if (file)
478 __remove_shared_vm_struct(next, file, mapping);
479 if (next->anon_vma)
480 __anon_vma_merge(vma, next);
481 } else if (insert) {
482
483
484
485
486
487 __insert_vm_struct(mm, insert);
488 }
489
490 if (anon_vma)
491 spin_unlock(&anon_vma->lock);
492 if (mapping)
493 spin_unlock(&mapping->i_mmap_lock);
494
495 if (remove_next) {
496 if (file)
497 fput(file);
498 mm->map_count--;
499 mpol_free(vma_policy(next));
500 kmem_cache_free(vm_area_cachep, next);
501
502
503
504
505
506 if (remove_next == 2) {
507 next = vma->vm_next;
508 goto again;
509 }
510 }
511
512 validate_mm(mm);
513}
514
515
516
517
518
519#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED)
520
521static inline int is_mergeable_vma(struct vm_area_struct *vma,
522 struct file *file, unsigned long vm_flags)
523{
524 if (vma->vm_flags != vm_flags)
525 return 0;
526 if (vma->vm_file != file)
527 return 0;
528 if (vma->vm_ops && vma->vm_ops->close)
529 return 0;
530 return 1;
531}
532
533static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
534 struct anon_vma *anon_vma2)
535{
536 return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
537}
538
539
540
541
542
543
544
545
546
547
548
549
550static int
551can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
552 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
553{
554 if (is_mergeable_vma(vma, file, vm_flags) &&
555 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
556 if (vma->vm_pgoff == vm_pgoff)
557 return 1;
558 }
559 return 0;
560}
561
562
563
564
565
566
567
568
569static int
570can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
571 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
572{
573 if (is_mergeable_vma(vma, file, vm_flags) &&
574 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
575 pgoff_t vm_pglen;
576 vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
577 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
578 return 1;
579 }
580 return 0;
581}
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612struct vm_area_struct *vma_merge(struct mm_struct *mm,
613 struct vm_area_struct *prev, unsigned long addr,
614 unsigned long end, unsigned long vm_flags,
615 struct anon_vma *anon_vma, struct file *file,
616 pgoff_t pgoff, struct mempolicy *policy)
617{
618 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
619 struct vm_area_struct *area, *next;
620
621
622
623
624
625 if (vm_flags & VM_SPECIAL)
626 return NULL;
627
628 if (prev)
629 next = prev->vm_next;
630 else
631 next = mm->mmap;
632 area = next;
633 if (next && next->vm_end == end)
634 next = next->vm_next;
635
636
637
638
639 if (prev && prev->vm_end == addr &&
640 mpol_equal(vma_policy(prev), policy) &&
641 can_vma_merge_after(prev, vm_flags,
642 anon_vma, file, pgoff)) {
643
644
645
646 if (next && end == next->vm_start &&
647 mpol_equal(policy, vma_policy(next)) &&
648 can_vma_merge_before(next, vm_flags,
649 anon_vma, file, pgoff+pglen) &&
650 is_mergeable_anon_vma(prev->anon_vma,
651 next->anon_vma)) {
652
653 vma_adjust(prev, prev->vm_start,
654 next->vm_end, prev->vm_pgoff, NULL);
655 } else
656 vma_adjust(prev, prev->vm_start,
657 end, prev->vm_pgoff, NULL);
658 if (prev->vm_flags & VM_EXEC)
659 arch_add_exec_range(mm, prev->vm_end);
660 return prev;
661 }
662
663
664
665
666 if (next && end == next->vm_start &&
667 mpol_equal(policy, vma_policy(next)) &&
668 can_vma_merge_before(next, vm_flags,
669 anon_vma, file, pgoff+pglen)) {
670 if (prev && addr < prev->vm_end)
671 vma_adjust(prev, prev->vm_start,
672 addr, prev->vm_pgoff, NULL);
673 else
674 vma_adjust(area, addr, next->vm_end,
675 next->vm_pgoff - pglen, NULL);
676 return area;
677 }
678
679 return NULL;
680}
681
682
683
684
685
686
687
688
689
690struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
691{
692 struct vm_area_struct *near;
693 unsigned long vm_flags;
694
695 near = vma->vm_next;
696 if (!near)
697 goto try_prev;
698
699
700
701
702
703
704
705 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
706 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
707
708 if (near->anon_vma && vma->vm_end == near->vm_start &&
709 mpol_equal(vma_policy(vma), vma_policy(near)) &&
710 can_vma_merge_before(near, vm_flags,
711 NULL, vma->vm_file, vma->vm_pgoff +
712 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
713 return near->anon_vma;
714try_prev:
715
716
717
718
719
720
721
722 if (find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma)
723 BUG();
724 if (!near)
725 goto none;
726
727 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
728 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
729
730 if (near->anon_vma && near->vm_end == vma->vm_start &&
731 mpol_equal(vma_policy(near), vma_policy(vma)) &&
732 can_vma_merge_after(near, vm_flags,
733 NULL, vma->vm_file, vma->vm_pgoff))
734 return near->anon_vma;
735none:
736
737
738
739
740
741
742
743
744 return NULL;
745}
746
747#ifdef CONFIG_PROC_FS
748void __vm_stat_account(struct mm_struct *mm, unsigned long flags,
749 struct file *file, long pages)
750{
751 const unsigned long stack_flags
752 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
753
754#ifdef CONFIG_HUGETLB
755 if (flags & VM_HUGETLB) {
756 if (!(flags & VM_DONTCOPY))
757 mm->shared_vm += pages;
758 return;
759 }
760#endif
761
762 if (file)
763 mm->shared_vm += pages;
764 else if (flags & stack_flags)
765 mm->stack_vm += pages;
766 if (flags & VM_EXEC)
767 mm->exec_vm += pages;
768 if (flags & (VM_RESERVED|VM_IO))
769 mm->reserved_vm += pages;
770}
771#endif
772
773
774
775
776
777unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
778 unsigned long len, unsigned long prot,
779 unsigned long flags, unsigned long pgoff)
780{
781 struct mm_struct * mm = current->mm;
782 struct vm_area_struct * vma, * prev;
783 struct inode *inode;
784 unsigned int vm_flags;
785 int correct_wcount = 0;
786 int error;
787 struct rb_node ** rb_link, * rb_parent;
788 int accountable = 1;
789 unsigned long charged = 0;
790
791 if (file) {
792 if (is_file_hugepages(file))
793 accountable = 0;
794
795 if (!file->f_op || !file->f_op->mmap)
796 return -ENODEV;
797
798 if ((prot & PROT_EXEC) &&
799 (file->f_vfsmnt->mnt_flags & MNT_NOEXEC))
800 return -EPERM;
801 }
802
803
804
805
806
807
808 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
809 if (!(file && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC)))
810 prot |= PROT_EXEC;
811
812 if (!len)
813 return addr;
814
815 error = arch_mmap_check(addr, len, flags);
816 if (error)
817 return error;
818
819
820 len = PAGE_ALIGN(len);
821 if (!len || len > TASK_SIZE)
822 return -EINVAL;
823
824
825 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
826 return -EINVAL;
827
828
829 if (mm->map_count > sysctl_max_map_count)
830 return -ENOMEM;
831
832
833
834
835 addr = get_unmapped_area_prot(file, addr, len, pgoff, flags, prot & PROT_EXEC);
836 if (addr & ~PAGE_MASK)
837 return addr;
838
839
840
841
842
843 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
844 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
845
846 if (flags & MAP_LOCKED) {
847 if (!can_do_mlock())
848 return -EPERM;
849 vm_flags |= VM_LOCKED;
850 }
851
852 if (vm_flags & VM_LOCKED) {
853 unsigned long locked, lock_limit;
854 locked = mm->locked_vm << PAGE_SHIFT;
855 lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
856 locked += len;
857 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
858 return -EAGAIN;
859 }
860
861 inode = file ? file->f_dentry->d_inode : NULL;
862
863 if (file) {
864 switch (flags & MAP_TYPE) {
865 case MAP_SHARED:
866 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
867 return -EACCES;
868
869
870
871
872
873 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
874 return -EACCES;
875
876
877
878
879 if (locks_verify_locked(inode))
880 return -EAGAIN;
881
882 vm_flags |= VM_SHARED | VM_MAYSHARE;
883 if (!(file->f_mode & FMODE_WRITE))
884 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
885
886
887 case MAP_PRIVATE:
888 if (!(file->f_mode & FMODE_READ))
889 return -EACCES;
890 break;
891
892 default:
893 return -EINVAL;
894 }
895 } else {
896 switch (flags & MAP_TYPE) {
897 case MAP_SHARED:
898 vm_flags |= VM_SHARED | VM_MAYSHARE;
899 break;
900 case MAP_PRIVATE:
901
902
903
904 pgoff = addr >> PAGE_SHIFT;
905 break;
906 default:
907 return -EINVAL;
908 }
909 }
910
911 error = security_file_mmap(file, prot, flags);
912 if (error)
913 return error;
914
915
916 error = -ENOMEM;
917munmap_back:
918 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
919 if (vma && vma->vm_start < addr + len) {
920 if (do_munmap(mm, addr, len))
921 return -ENOMEM;
922 goto munmap_back;
923 }
924
925
926 if ((mm->total_vm << PAGE_SHIFT) + len
927 > current->rlim[RLIMIT_AS].rlim_cur)
928 return -ENOMEM;
929
930 if (accountable && (!(flags & MAP_NORESERVE) ||
931 sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
932 if (vm_flags & VM_SHARED) {
933
934 vm_flags |= VM_ACCOUNT;
935 } else if (vm_flags & VM_WRITE) {
936
937
938
939 charged = len >> PAGE_SHIFT;
940 if (security_vm_enough_memory(charged))
941 return -ENOMEM;
942 vm_flags |= VM_ACCOUNT;
943 }
944 }
945
946
947
948
949
950
951 if (!file && !(vm_flags & VM_SHARED) &&
952 vma_merge(mm, prev, addr, addr + len, vm_flags,
953 NULL, NULL, pgoff, NULL))
954 goto out;
955
956
957
958
959
960
961 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
962 if (!vma) {
963 error = -ENOMEM;
964 goto unacct_error;
965 }
966 memset(vma, 0, sizeof(*vma));
967
968 vma->vm_mm = mm;
969 vma->vm_start = addr;
970 vma->vm_end = addr + len;
971 vma->vm_flags = vm_flags;
972 vma->vm_page_prot = protection_map[vm_flags & 0x0f];
973 vma->vm_pgoff = pgoff;
974
975 if (file) {
976 error = -EINVAL;
977 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
978 goto free_vma;
979 if (vm_flags & VM_DENYWRITE) {
980 error = deny_write_access(file);
981 if (error)
982 goto free_vma;
983 correct_wcount = 1;
984 }
985 vma->vm_file = file;
986 get_file(file);
987 error = file->f_op->mmap(file, vma);
988 if (error)
989 goto unmap_and_free_vma;
990 } else if (vm_flags & VM_SHARED) {
991 error = shmem_zero_setup(vma);
992 if (error)
993 goto free_vma;
994 }
995
996
997
998
999
1000
1001 if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
1002 vma->vm_flags &= ~VM_ACCOUNT;
1003
1004
1005
1006
1007
1008
1009 addr = vma->vm_start;
1010 pgoff = vma->vm_pgoff;
1011 vm_flags = vma->vm_flags;
1012
1013 if (!file || !vma_merge(mm, prev, addr, vma->vm_end,
1014 vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
1015 file = vma->vm_file;
1016 vma_link(mm, vma, prev, rb_link, rb_parent);
1017 if (correct_wcount)
1018 atomic_inc(&inode->i_writecount);
1019 } else {
1020 if (file) {
1021 if (correct_wcount)
1022 atomic_inc(&inode->i_writecount);
1023 fput(file);
1024 }
1025 mpol_free(vma_policy(vma));
1026 kmem_cache_free(vm_area_cachep, vma);
1027 }
1028out:
1029 mm->total_vm += len >> PAGE_SHIFT;
1030 __vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1031 if (vm_flags & VM_LOCKED) {
1032 mm->locked_vm += len >> PAGE_SHIFT;
1033 make_pages_present(addr, addr + len);
1034 }
1035 if (flags & MAP_POPULATE) {
1036 up_write(&mm->mmap_sem);
1037 sys_remap_file_pages(addr, len, 0,
1038 pgoff, flags & MAP_NONBLOCK);
1039 down_write(&mm->mmap_sem);
1040 }
1041 if (file && file->f_op &&
1042 file->f_op->mmap == generic_file_noatime_mmap) {
1043 if (!(file->f_flags & O_NOATIME)) {
1044 up_write(&mm->mmap_sem);
1045 update_atime(file->f_dentry->d_inode);
1046 down_write(&mm->mmap_sem);
1047 }
1048 }
1049 return addr;
1050
1051unmap_and_free_vma:
1052 if (correct_wcount)
1053 atomic_inc(&inode->i_writecount);
1054 vma->vm_file = NULL;
1055 fput(file);
1056
1057
1058 zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1059free_vma:
1060 kmem_cache_free(vm_area_cachep, vma);
1061unacct_error:
1062 if (charged)
1063 vm_unacct_memory(charged);
1064 return error;
1065}
1066
1067EXPORT_SYMBOL(do_mmap_pgoff);
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080#ifndef HAVE_ARCH_UNMAPPED_AREA
1081unsigned long
1082arch_get_unmapped_area(struct file *filp, unsigned long addr,
1083 unsigned long len, unsigned long pgoff, unsigned long flags)
1084{
1085 struct mm_struct *mm = current->mm;
1086 struct vm_area_struct *vma;
1087 unsigned long start_addr;
1088
1089 if (len > TASK_SIZE)
1090 return -ENOMEM;
1091
1092 if (addr) {
1093 addr = PAGE_ALIGN(addr);
1094 vma = find_vma(mm, addr);
1095 if (TASK_SIZE - len >= addr &&
1096 (!vma || addr + len <= vma->vm_start))
1097 return addr;
1098 }
1099
1100#ifdef CONFIG_X86
1101 if (sysctl_legacy_va_layout)
1102 start_addr = addr = mm->mmap_base;
1103 else
1104#endif
1105 start_addr = addr = mm->free_area_cache;
1106
1107full_search:
1108 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
1109
1110 if (TASK_SIZE - len < addr) {
1111
1112
1113
1114
1115 if (start_addr != TASK_UNMAPPED_BASE) {
1116 start_addr = addr = TASK_UNMAPPED_BASE;
1117 goto full_search;
1118 }
1119 return -ENOMEM;
1120 }
1121 if (!vma || addr + len <= vma->vm_start) {
1122
1123
1124
1125 mm->free_area_cache = addr + len;
1126 return addr;
1127 }
1128 addr = vma->vm_end;
1129 }
1130}
1131#endif
1132
1133void arch_unmap_area(struct vm_area_struct *area)
1134{
1135
1136
1137
1138 if (area->vm_start >= TASK_UNMAPPED_BASE &&
1139 area->vm_start < area->vm_mm->free_area_cache)
1140 area->vm_mm->free_area_cache = area->vm_start;
1141}
1142
1143
1144
1145
1146
1147#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1148unsigned long
1149arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1150 const unsigned long len, const unsigned long pgoff,
1151 const unsigned long flags)
1152{
1153 struct vm_area_struct *vma, *prev_vma;
1154 struct mm_struct *mm = current->mm;
1155 unsigned long base = mm->mmap_base, addr = addr0;
1156 int first_time = 1;
1157
1158
1159 if (len > TASK_SIZE)
1160 return -ENOMEM;
1161
1162
1163 if (mm->free_area_cache > base)
1164 mm->free_area_cache = base;
1165
1166
1167 if (addr) {
1168 addr = PAGE_ALIGN(addr);
1169 vma = find_vma(mm, addr);
1170 if (TASK_SIZE - len >= addr &&
1171 (!vma || addr + len <= vma->vm_start))
1172 return addr;
1173 }
1174
1175#ifdef CONFIG_X86
1176 if (sysctl_legacy_va_layout)
1177 goto fail;
1178#endif
1179
1180try_again:
1181
1182 if (mm->free_area_cache < len)
1183 goto fail;
1184
1185
1186 addr = (mm->free_area_cache - len) & PAGE_MASK;
1187 do {
1188
1189
1190
1191
1192 if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
1193 return addr;
1194
1195
1196
1197
1198
1199 if (addr && addr+len <= vma->vm_start &&
1200 (!prev_vma || (addr >= prev_vma->vm_end)))
1201
1202 return (mm->free_area_cache = addr);
1203 else
1204
1205 if (mm->free_area_cache == vma->vm_end)
1206 mm->free_area_cache = vma->vm_start;
1207
1208
1209 addr = vma->vm_start-len;
1210 } while (len < vma->vm_start);
1211
1212fail:
1213
1214
1215
1216
1217 if (first_time) {
1218 mm->free_area_cache = base;
1219 first_time = 0;
1220 goto try_again;
1221 }
1222
1223
1224
1225
1226
1227
1228 mm->free_area_cache = TASK_UNMAPPED_BASE;
1229 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
1230
1231
1232
1233 mm->free_area_cache = base;
1234
1235 return addr;
1236}
1237#endif
1238
1239void arch_unmap_area_topdown(struct vm_area_struct *area)
1240{
1241
1242
1243
1244 if (area->vm_end > area->vm_mm->free_area_cache)
1245 area->vm_mm->free_area_cache = area->vm_end;
1246}
1247
1248
1249unsigned long
1250get_unmapped_area_prot(struct file *file, unsigned long addr, unsigned long len,
1251 unsigned long pgoff, unsigned long flags, int exec)
1252{
1253 if (flags & MAP_FIXED) {
1254 unsigned long ret;
1255
1256 if (addr > TASK_SIZE - len)
1257 return -ENOMEM;
1258 if (addr & ~PAGE_MASK)
1259 return -EINVAL;
1260 if (file && is_file_hugepages(file)) {
1261
1262
1263
1264
1265 ret = prepare_hugepage_range(addr, len);
1266 } else {
1267
1268
1269
1270
1271
1272 ret = is_hugepage_only_range(addr, len);
1273 }
1274 if (ret)
1275 return -EINVAL;
1276 return addr;
1277 }
1278
1279 if (file && file->f_op && file->f_op->get_unmapped_area)
1280 return file->f_op->get_unmapped_area(file, addr, len,
1281 pgoff, flags);
1282
1283 if (exec && current->mm->get_unmapped_exec_area)
1284 return current->mm->get_unmapped_exec_area(file, addr, len, pgoff, flags);
1285 else
1286 return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
1287}
1288
1289EXPORT_SYMBOL(get_unmapped_area_prot);
1290
1291
1292#define SHLIB_BASE 0x00111000
1293
1294unsigned long arch_get_unmapped_exec_area(struct file *filp, unsigned long addr0,
1295 unsigned long len0, unsigned long pgoff, unsigned long flags)
1296{
1297 unsigned long addr = addr0, len = len0;
1298 struct mm_struct *mm = current->mm;
1299 struct vm_area_struct *vma;
1300 unsigned long tmp;
1301
1302 if (len > TASK_SIZE)
1303 return -ENOMEM;
1304
1305 if (!addr && !(flags & MAP_FIXED))
1306 addr = randomize_range(SHLIB_BASE, 0x01000000, len);
1307
1308 if (addr) {
1309 addr = PAGE_ALIGN(addr);
1310 vma = find_vma(mm, addr);
1311 if (TASK_SIZE - len >= addr &&
1312 (!vma || addr + len <= vma->vm_start)) {
1313 return addr;
1314 }
1315 }
1316
1317 addr = SHLIB_BASE;
1318
1319 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
1320
1321 if (TASK_SIZE - len < addr) {
1322 return -ENOMEM;
1323 }
1324 if (!vma || addr + len <= vma->vm_start) {
1325
1326
1327
1328
1329 if (addr + len > mm->brk)
1330 goto failed;
1331
1332
1333
1334
1335
1336 if (addr >= 0x01000000) {
1337 tmp = randomize_range(0x01000000, PAGE_ALIGN(max(mm->start_brk, 0x08000000)), len);
1338 vma = find_vma(mm, tmp);
1339 if (TASK_SIZE - len >= tmp &&
1340 (!vma || tmp + len <= vma->vm_start))
1341 return tmp;
1342 }
1343
1344
1345
1346
1347 return addr;
1348 }
1349 addr = vma->vm_end;
1350 }
1351
1352failed:
1353 return current->mm->get_unmapped_area(filp, addr0, len0, pgoff, flags);
1354}
1355
1356
1357
1358
1359struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
1360{
1361 struct vm_area_struct *vma = NULL;
1362
1363 if (mm) {
1364
1365
1366 vma = mm->mmap_cache;
1367 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1368 struct rb_node * rb_node;
1369
1370 rb_node = mm->mm_rb.rb_node;
1371 vma = NULL;
1372
1373 while (rb_node) {
1374 struct vm_area_struct * vma_tmp;
1375
1376 vma_tmp = rb_entry(rb_node,
1377 struct vm_area_struct, vm_rb);
1378
1379 if (vma_tmp->vm_end > addr) {
1380 vma = vma_tmp;
1381 if (vma_tmp->vm_start <= addr)
1382 break;
1383 rb_node = rb_node->rb_left;
1384 } else
1385 rb_node = rb_node->rb_right;
1386 }
1387 if (vma)
1388 mm->mmap_cache = vma;
1389 }
1390 }
1391 return vma;
1392}
1393
1394EXPORT_SYMBOL(find_vma);
1395
1396
1397struct vm_area_struct *
1398find_vma_prev(struct mm_struct *mm, unsigned long addr,
1399 struct vm_area_struct **pprev)
1400{
1401 struct vm_area_struct *vma = NULL, *prev = NULL;
1402 struct rb_node * rb_node;
1403 if (!mm)
1404 goto out;
1405
1406
1407 vma = mm->mmap;
1408
1409
1410 rb_node = mm->mm_rb.rb_node;
1411
1412 while (rb_node) {
1413 struct vm_area_struct *vma_tmp;
1414 vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
1415
1416 if (addr < vma_tmp->vm_end) {
1417 rb_node = rb_node->rb_left;
1418 } else {
1419 prev = vma_tmp;
1420 if (!prev->vm_next || (addr < prev->vm_next->vm_end))
1421 break;
1422 rb_node = rb_node->rb_right;
1423 }
1424 }
1425
1426out:
1427 *pprev = prev;
1428 return prev ? prev->vm_next : vma;
1429}
1430
1431
1432static int over_stack_limit(unsigned long sz)
1433{
1434 if (sz < EXEC_STACK_BIAS)
1435 return 0;
1436 return (sz - EXEC_STACK_BIAS) > current->rlim[RLIMIT_STACK].rlim_cur;
1437}
1438
1439#ifdef CONFIG_STACK_GROWSUP
1440
1441
1442
1443int expand_stack(struct vm_area_struct * vma, unsigned long address)
1444{
1445 unsigned long grow;
1446 unsigned long size;
1447
1448 if (!(vma->vm_flags & VM_GROWSUP))
1449 return -EFAULT;
1450
1451
1452
1453
1454
1455 if (unlikely(anon_vma_prepare(vma)))
1456 return -ENOMEM;
1457 anon_vma_lock(vma);
1458
1459
1460
1461
1462
1463
1464 address += 4 + PAGE_SIZE - 1;
1465 address &= PAGE_MASK;
1466 size = address - vma->vm_start;
1467 grow = (address - vma->vm_end) >> PAGE_SHIFT;
1468
1469
1470 if (address <= vma->vm_end) {
1471 anon_vma_unlock(vma);
1472 return 0;
1473 }
1474
1475
1476 if (is_hugepage_only_range(vma->vm_start, size)) {
1477 anon_vma_unlock(vma);
1478 return -EFAULT;
1479 }
1480
1481
1482 if (security_vm_enough_memory(grow)) {
1483 anon_vma_unlock(vma);
1484 return -ENOMEM;
1485 }
1486
1487 if (over_stack_limit(address - vma->vm_start) ||
1488 ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
1489 current->rlim[RLIMIT_AS].rlim_cur) {
1490 anon_vma_unlock(vma);
1491 vm_unacct_memory(grow);
1492 return -ENOMEM;
1493 }
1494 if ((vma->vm_flags & VM_LOCKED) && !capable(CAP_IPC_LOCK) &&
1495 ((vma->vm_mm->locked_vm + grow) << PAGE_SHIFT) >
1496 current->rlim[RLIMIT_MEMLOCK].rlim_cur) {
1497 anon_vma_unlock(vma);
1498 vm_unacct_memory(grow);
1499 return -ENOMEM;
1500 }
1501 vma->vm_end = address;
1502 vma->vm_mm->total_vm += grow;
1503 if (vma->vm_flags & VM_LOCKED)
1504 vma->vm_mm->locked_vm += grow;
1505 __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
1506 anon_vma_unlock(vma);
1507 return 0;
1508}
1509
1510struct vm_area_struct *
1511find_extend_vma(struct mm_struct *mm, unsigned long addr)
1512{
1513 struct vm_area_struct *vma, *prev;
1514
1515 addr &= PAGE_MASK;
1516 vma = find_vma_prev(mm, addr, &prev);
1517 if (vma && (vma->vm_start <= addr))
1518 return vma;
1519 if (!prev || expand_stack(prev, addr))
1520 return NULL;
1521 if (prev->vm_flags & VM_LOCKED) {
1522 make_pages_present(addr, prev->vm_end);
1523 }
1524 return prev;
1525}
1526#else
1527
1528
1529
1530int expand_stack(struct vm_area_struct *vma, unsigned long address)
1531{
1532 unsigned long grow;
1533 unsigned long size;
1534
1535
1536
1537
1538
1539 if (unlikely(anon_vma_prepare(vma)))
1540 return -ENOMEM;
1541 anon_vma_lock(vma);
1542
1543
1544
1545
1546
1547
1548 address &= PAGE_MASK;
1549 size = vma->vm_end - address;
1550 grow = (vma->vm_start - address) >> PAGE_SHIFT;
1551
1552
1553 if (address >= vma->vm_start) {
1554 anon_vma_unlock(vma);
1555 return 0;
1556 }
1557
1558
1559 if (is_hugepage_only_range(vma->vm_end - size, size)) {
1560 anon_vma_unlock(vma);
1561 return -EFAULT;
1562 }
1563
1564
1565 if (security_vm_enough_memory(grow)) {
1566 anon_vma_unlock(vma);
1567 return -ENOMEM;
1568 }
1569
1570 if (over_stack_limit(vma->vm_end - address) ||
1571 ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
1572 current->rlim[RLIMIT_AS].rlim_cur) {
1573 anon_vma_unlock(vma);
1574 vm_unacct_memory(grow);
1575 return -ENOMEM;
1576 }
1577 if ((vma->vm_flags & VM_LOCKED) && !capable(CAP_IPC_LOCK) &&
1578 ((vma->vm_mm->locked_vm + grow) << PAGE_SHIFT) >
1579 current->rlim[RLIMIT_MEMLOCK].rlim_cur) {
1580 anon_vma_unlock(vma);
1581 vm_unacct_memory(grow);
1582 return -ENOMEM;
1583 }
1584 vma->vm_start = address;
1585 vma->vm_pgoff -= grow;
1586 vma->vm_mm->total_vm += grow;
1587 if (vma->vm_flags & VM_LOCKED)
1588 vma->vm_mm->locked_vm += grow;
1589 __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
1590 anon_vma_unlock(vma);
1591 return 0;
1592}
1593
1594struct vm_area_struct *
1595find_extend_vma(struct mm_struct * mm, unsigned long addr)
1596{
1597 struct vm_area_struct * vma;
1598 unsigned long start;
1599
1600 addr &= PAGE_MASK;
1601 vma = find_vma(mm,addr);
1602 if (!vma)
1603 return NULL;
1604 if (vma->vm_start <= addr)
1605 return vma;
1606 if (!(vma->vm_flags & VM_GROWSDOWN))
1607 return NULL;
1608 start = vma->vm_start;
1609 if (expand_stack(vma, addr))
1610 return NULL;
1611 if (vma->vm_flags & VM_LOCKED) {
1612 make_pages_present(addr, start);
1613 }
1614 return vma;
1615}
1616#endif
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631static void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
1632 unsigned long start, unsigned long end)
1633{
1634 unsigned long first = start & PGDIR_MASK;
1635 unsigned long last = end + PGDIR_SIZE - 1;
1636 unsigned long start_index, end_index;
1637 struct mm_struct *mm = tlb->mm;
1638
1639 if (!prev) {
1640 prev = mm->mmap;
1641 if (!prev)
1642 goto no_mmaps;
1643 if (prev->vm_end > start) {
1644 if (last > prev->vm_start)
1645 last = prev->vm_start;
1646 goto no_mmaps;
1647 }
1648 }
1649 for (;;) {
1650 struct vm_area_struct *next = prev->vm_next;
1651
1652 if (next) {
1653 if (next->vm_start < start) {
1654 prev = next;
1655 continue;
1656 }
1657 if (last > next->vm_start)
1658 last = next->vm_start;
1659 }
1660 if (prev->vm_end > first)
1661 first = prev->vm_end + PGDIR_SIZE - 1;
1662 break;
1663 }
1664no_mmaps:
1665 if (last < first)
1666 return;
1667
1668
1669
1670
1671 start_index = pgd_index(first);
1672 if (start_index < FIRST_USER_PGD_NR)
1673 start_index = FIRST_USER_PGD_NR;
1674 end_index = pgd_index(last);
1675 if (end_index > start_index) {
1676 clear_page_tables(tlb, start_index, end_index - start_index);
1677 flush_tlb_pgtables(mm, first & PGDIR_MASK, last & PGDIR_MASK);
1678 }
1679}
1680
1681
1682
1683
1684
1685
1686
1687
1688static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
1689{
1690 size_t len = area->vm_end - area->vm_start;
1691
1692 area->vm_mm->total_vm -= len >> PAGE_SHIFT;
1693 if (area->vm_flags & VM_LOCKED)
1694 area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
1695 vm_stat_unaccount(area);
1696 area->vm_mm->unmap_area(area);
1697 remove_vm_struct(area);
1698}
1699
1700
1701
1702
1703
1704
1705
1706static void unmap_vma_list(struct mm_struct *mm,
1707 struct vm_area_struct *mpnt)
1708{
1709 do {
1710 struct vm_area_struct *next = mpnt->vm_next;
1711 unmap_vma(mm, mpnt);
1712 mpnt = next;
1713 } while (mpnt != NULL);
1714 validate_mm(mm);
1715}
1716
1717
1718
1719
1720
1721
1722static void unmap_region(struct mm_struct *mm,
1723 struct vm_area_struct *vma,
1724 struct vm_area_struct *prev,
1725 unsigned long start,
1726 unsigned long end)
1727{
1728 struct mmu_gather *tlb;
1729 unsigned long nr_accounted = 0;
1730
1731 lru_add_drain();
1732 tlb = tlb_gather_mmu(mm, 0);
1733 unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
1734 vm_unacct_memory(nr_accounted);
1735
1736 if (is_hugepage_only_range(start, end - start))
1737 hugetlb_free_pgtables(tlb, prev, start, end);
1738 else
1739 free_pgtables(tlb, prev, start, end);
1740 tlb_finish_mmu(tlb, start, end);
1741}
1742
1743
1744
1745
1746
1747static void
1748detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1749 struct vm_area_struct *prev, unsigned long end)
1750{
1751 struct vm_area_struct **insertion_point;
1752 struct vm_area_struct *tail_vma = NULL;
1753
1754 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
1755 do {
1756 rb_erase(&vma->vm_rb, &mm->mm_rb);
1757 mm->map_count--;
1758 tail_vma = vma;
1759 vma = vma->vm_next;
1760 } while (vma && vma->vm_start < end);
1761 *insertion_point = vma;
1762 tail_vma->vm_next = NULL;
1763 mm->mmap_cache = NULL;
1764}
1765
1766
1767
1768
1769
1770int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1771 unsigned long addr, int new_below)
1772{
1773 struct mempolicy *pol;
1774 struct vm_area_struct *new;
1775
1776 if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK))
1777 return -EINVAL;
1778
1779 if (mm->map_count >= sysctl_max_map_count)
1780 return -ENOMEM;
1781
1782 new = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
1783 if (!new)
1784 return -ENOMEM;
1785
1786
1787 *new = *vma;
1788
1789 if (new_below)
1790 new->vm_end = addr;
1791 else {
1792 new->vm_start = addr;
1793 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
1794 }
1795
1796 pol = mpol_copy(vma_policy(vma));
1797 if (IS_ERR(pol)) {
1798 kmem_cache_free(vm_area_cachep, new);
1799 return PTR_ERR(pol);
1800 }
1801 vma_set_policy(new, pol);
1802
1803 if (new->vm_file)
1804 get_file(new->vm_file);
1805
1806 if (new->vm_ops && new->vm_ops->open)
1807 new->vm_ops->open(new);
1808
1809 if (new_below) {
1810 unsigned long old_end = vma->vm_end;
1811
1812 vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
1813 ((addr - new->vm_start) >> PAGE_SHIFT), new);
1814 if (vma->vm_flags & VM_EXEC)
1815 arch_remove_exec_range(mm, old_end);
1816 } else
1817 vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
1818
1819 return 0;
1820}
1821
1822
1823
1824
1825
1826
1827int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1828{
1829 unsigned long end;
1830 struct vm_area_struct *mpnt, *prev, *last;
1831
1832 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
1833 return -EINVAL;
1834
1835 if ((len = PAGE_ALIGN(len)) == 0)
1836 return -EINVAL;
1837
1838
1839 mpnt = find_vma_prev(mm, start, &prev);
1840 if (!mpnt)
1841 return 0;
1842
1843
1844
1845 end = start + len;
1846 if (mpnt->vm_start >= end)
1847 return 0;
1848
1849
1850
1851
1852
1853
1854
1855
1856 if (start > mpnt->vm_start) {
1857 int error = split_vma(mm, mpnt, start, 0);
1858 if (error)
1859 return error;
1860 prev = mpnt;
1861 }
1862
1863
1864 last = find_vma(mm, end);
1865 if (last && end > last->vm_start) {
1866 int error = split_vma(mm, last, end, 1);
1867 if (error)
1868 return error;
1869 }
1870 mpnt = prev? prev->vm_next: mm->mmap;
1871
1872
1873
1874
1875 detach_vmas_to_be_unmapped(mm, mpnt, prev, end);
1876 spin_lock(&mm->page_table_lock);
1877 unmap_region(mm, mpnt, prev, start, end);
1878 spin_unlock(&mm->page_table_lock);
1879
1880
1881 unmap_vma_list(mm, mpnt);
1882
1883 return 0;
1884}
1885
1886EXPORT_SYMBOL(do_munmap);
1887
1888asmlinkage long sys_munmap(unsigned long addr, size_t len)
1889{
1890 int ret;
1891 struct mm_struct *mm = current->mm;
1892
1893 profile_munmap(addr);
1894
1895 down_write(&mm->mmap_sem);
1896 ret = do_munmap(mm, addr, len);
1897 up_write(&mm->mmap_sem);
1898 return ret;
1899}
1900
1901
1902
1903
1904
1905
1906unsigned long do_brk(unsigned long addr, unsigned long len)
1907{
1908 struct mm_struct * mm = current->mm;
1909 struct vm_area_struct * vma, * prev;
1910 unsigned long flags;
1911 struct rb_node ** rb_link, * rb_parent;
1912 pgoff_t pgoff = addr >> PAGE_SHIFT;
1913 int error;
1914
1915 len = PAGE_ALIGN(len);
1916 if (!len)
1917 return addr;
1918
1919 if ((addr + len) > TASK_SIZE || (addr + len) < addr)
1920 return -EINVAL;
1921
1922 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
1923
1924 error = arch_mmap_check(addr, len, flags);
1925 if (error)
1926 return error;
1927
1928
1929
1930
1931 if (mm->def_flags & VM_LOCKED) {
1932 unsigned long locked, lock_limit;
1933 locked = mm->locked_vm << PAGE_SHIFT;
1934 lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
1935 locked += len;
1936 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1937 return -EAGAIN;
1938 }
1939
1940
1941
1942
1943 munmap_back:
1944 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
1945 if (vma && vma->vm_start < addr + len) {
1946 if (do_munmap(mm, addr, len))
1947 return -ENOMEM;
1948 goto munmap_back;
1949 }
1950
1951
1952 if ((mm->total_vm << PAGE_SHIFT) + len
1953 > current->rlim[RLIMIT_AS].rlim_cur)
1954 return -ENOMEM;
1955
1956 if (mm->map_count > sysctl_max_map_count)
1957 return -ENOMEM;
1958
1959 if (security_vm_enough_memory(len >> PAGE_SHIFT))
1960 return -ENOMEM;
1961
1962
1963 if (vma_merge(mm, prev, addr, addr + len, flags,
1964 NULL, NULL, pgoff, NULL))
1965 goto out;
1966
1967
1968
1969
1970 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
1971 if (!vma) {
1972 vm_unacct_memory(len >> PAGE_SHIFT);
1973 return -ENOMEM;
1974 }
1975 memset(vma, 0, sizeof(*vma));
1976
1977 vma->vm_mm = mm;
1978 vma->vm_start = addr;
1979 vma->vm_end = addr + len;
1980 vma->vm_pgoff = pgoff;
1981 vma->vm_flags = flags;
1982 vma->vm_page_prot = protection_map[flags & 0x0f];
1983 vma_link(mm, vma, prev, rb_link, rb_parent);
1984out:
1985 mm->total_vm += len >> PAGE_SHIFT;
1986 if (flags & VM_LOCKED) {
1987 mm->locked_vm += len >> PAGE_SHIFT;
1988 make_pages_present(addr, addr + len);
1989 }
1990 return addr;
1991}
1992
1993EXPORT_SYMBOL(do_brk);
1994
1995
1996unsigned long do_brk_locked(unsigned long addr, unsigned long len)
1997{
1998 unsigned long ret;
1999
2000 down_write(¤t->mm->mmap_sem);
2001 ret = do_brk(addr, len);
2002 up_write(¤t->mm->mmap_sem);
2003
2004 return ret;
2005}
2006
2007EXPORT_SYMBOL(do_brk_locked);
2008
2009
2010void exit_mmap(struct mm_struct *mm)
2011{
2012 struct mmu_gather *tlb;
2013 struct vm_area_struct *vma;
2014 unsigned long nr_accounted = 0;
2015
2016#ifdef arch_exit_mmap
2017 arch_exit_mmap(mm);
2018#endif
2019
2020 lru_add_drain();
2021
2022 spin_lock(&mm->page_table_lock);
2023
2024 tlb = tlb_gather_mmu(mm, 1);
2025 flush_cache_mm(mm);
2026
2027 mm->map_count -= unmap_vmas(&tlb, mm, mm->mmap, 0,
2028 ~0UL, &nr_accounted, NULL);
2029 vm_unacct_memory(nr_accounted);
2030 BUG_ON(mm->map_count);
2031 clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
2032 tlb_finish_mmu(tlb, 0, MM_VM_SIZE(mm));
2033
2034 vma = mm->mmap;
2035 mm->mmap = mm->mmap_cache = NULL;
2036 mm->mm_rb = RB_ROOT;
2037 mm->rss = 0;
2038 mm->total_vm = 0;
2039 mm->locked_vm = 0;
2040 arch_flush_exec_range(mm);
2041
2042 spin_unlock(&mm->page_table_lock);
2043
2044
2045
2046
2047
2048 while (vma) {
2049 struct vm_area_struct *next = vma->vm_next;
2050 remove_vm_struct(vma);
2051 vma = next;
2052 }
2053}
2054
2055
2056
2057
2058
2059int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
2060{
2061 struct vm_area_struct * __vma, * prev;
2062 struct rb_node ** rb_link, * rb_parent;
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076 if (!vma->vm_file) {
2077 BUG_ON(vma->anon_vma);
2078 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2079 }
2080 __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
2081 if (__vma && __vma->vm_start < vma->vm_end)
2082 return -ENOMEM;
2083 if ((vma->vm_flags & VM_ACCOUNT) &&
2084 security_vm_enough_memory(vma_pages(vma)))
2085 return -ENOMEM;
2086 vma_link(mm, vma, prev, rb_link, rb_parent);
2087 return 0;
2088}
2089
2090
2091
2092
2093
2094struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2095 unsigned long addr, unsigned long len, pgoff_t pgoff)
2096{
2097 struct vm_area_struct *vma = *vmap;
2098 unsigned long vma_start = vma->vm_start;
2099 struct mm_struct *mm = vma->vm_mm;
2100 struct vm_area_struct *new_vma, *prev;
2101 struct rb_node **rb_link, *rb_parent;
2102 struct mempolicy *pol;
2103
2104
2105
2106
2107
2108 if (!vma->vm_file && !vma->anon_vma)
2109 pgoff = addr >> PAGE_SHIFT;
2110
2111 find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2112 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2113 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
2114 if (new_vma) {
2115
2116
2117
2118 if (vma_start >= new_vma->vm_start &&
2119 vma_start < new_vma->vm_end)
2120 *vmap = new_vma;
2121 } else {
2122 new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
2123 if (new_vma) {
2124 *new_vma = *vma;
2125 pol = mpol_copy(vma_policy(vma));
2126 if (IS_ERR(pol)) {
2127 kmem_cache_free(vm_area_cachep, new_vma);
2128 return NULL;
2129 }
2130 vma_set_policy(new_vma, pol);
2131 new_vma->vm_start = addr;
2132 new_vma->vm_end = addr + len;
2133 new_vma->vm_pgoff = pgoff;
2134 if (new_vma->vm_file)
2135 get_file(new_vma->vm_file);
2136 if (new_vma->vm_ops && new_vma->vm_ops->open)
2137 new_vma->vm_ops->open(new_vma);
2138 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2139 }
2140 }
2141 return new_vma;
2142}
2143