1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62#include <linux/mempolicy.h>
63#include <linux/mm.h>
64#include <linux/highmem.h>
65#include <linux/hugetlb.h>
66#include <linux/kernel.h>
67#include <linux/sched.h>
68#include <linux/mm.h>
69#include <linux/gfp.h>
70#include <linux/slab.h>
71#include <linux/string.h>
72#include <linux/module.h>
73#include <linux/interrupt.h>
74#include <linux/init.h>
75#include <linux/compat.h>
76#include <linux/mempolicy.h>
77#include <asm/uaccess.h>
78
79static kmem_cache_t *policy_cache;
80static kmem_cache_t *sn_cache;
81
82#define PDprintk(fmt...)
83
84
85
86static int policy_zone;
87
88static struct mempolicy default_policy = {
89 .refcnt = ATOMIC_INIT(1),
90 .policy = MPOL_DEFAULT,
91};
92
93
94static int nodes_online(unsigned long *nodes)
95{
96 DECLARE_BITMAP(online2, MAX_NUMNODES);
97
98 bitmap_copy(online2, node_online_map, MAX_NUMNODES);
99 if (bitmap_empty(online2, MAX_NUMNODES))
100 set_bit(0, online2);
101 if (!bitmap_subset(nodes, online2, MAX_NUMNODES))
102 return -EINVAL;
103 return 0;
104}
105
106
107static int mpol_check_policy(int mode, unsigned long *nodes)
108{
109 int empty = bitmap_empty(nodes, MAX_NUMNODES);
110
111 switch (mode) {
112 case MPOL_DEFAULT:
113 if (!empty)
114 return -EINVAL;
115 break;
116 case MPOL_BIND:
117 case MPOL_INTERLEAVE:
118
119
120 if (empty)
121 return -EINVAL;
122 break;
123 }
124 return nodes_online(nodes);
125}
126
127
128static int get_nodes(unsigned long *nodes, unsigned long __user *nmask,
129 unsigned long maxnode, int mode)
130{
131 unsigned long k;
132 unsigned long nlongs;
133 unsigned long endmask;
134
135 --maxnode;
136 bitmap_zero(nodes, MAX_NUMNODES);
137 if (maxnode == 0 || !nmask)
138 return 0;
139
140 nlongs = BITS_TO_LONGS(maxnode);
141 if (nlongs == 0)
142 return -EINVAL;
143
144 if ((maxnode % BITS_PER_LONG) == 0)
145 endmask = ~0UL;
146 else
147 endmask = (1UL << (maxnode % BITS_PER_LONG)) - 1;
148
149
150
151 if (nlongs > BITS_TO_LONGS(MAX_NUMNODES)) {
152 if (nlongs > PAGE_SIZE/sizeof(long))
153 return -EINVAL;
154 for (k = BITS_TO_LONGS(MAX_NUMNODES); k < nlongs; k++) {
155 unsigned long t;
156 if (get_user(t, nmask + k))
157 return -EFAULT;
158 if (k == nlongs - 1) {
159 if (t & endmask)
160 return -EINVAL;
161 } else if (t)
162 return -EINVAL;
163 }
164 nlongs = BITS_TO_LONGS(MAX_NUMNODES);
165 endmask = ~0UL;
166 }
167
168 if (copy_from_user(nodes, nmask, nlongs*sizeof(unsigned long)))
169 return -EFAULT;
170 nodes[nlongs-1] &= endmask;
171 return 0;
172}
173
174
175static struct zonelist *bind_zonelist(unsigned long *nodes)
176{
177 struct zonelist *zl;
178 int num, max, nd;
179
180 max = 1 + MAX_NR_ZONES * bitmap_weight(nodes, MAX_NUMNODES);
181 zl = kmalloc(sizeof(void *) * max, GFP_KERNEL);
182 if (!zl)
183 return NULL;
184 num = 0;
185 for (nd = find_first_bit(nodes, MAX_NUMNODES);
186 nd < MAX_NUMNODES;
187 nd = find_next_bit(nodes, MAX_NUMNODES, 1+nd)) {
188 int k;
189 for (k = MAX_NR_ZONES-1; k >= 0; k--) {
190 struct zone *z = &NODE_DATA(nd)->node_zones[k];
191 if (!z->present_pages)
192 continue;
193 zl->zones[num++] = z;
194 if (k > policy_zone)
195 policy_zone = k;
196 }
197 }
198 BUG_ON(num >= max);
199 zl->zones[num] = NULL;
200 return zl;
201}
202
203
204static struct mempolicy *mpol_new(int mode, unsigned long *nodes)
205{
206 struct mempolicy *policy;
207
208 PDprintk("setting mode %d nodes[0] %lx\n", mode, nodes[0]);
209 if (mode == MPOL_DEFAULT)
210 return NULL;
211 policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
212 if (!policy)
213 return ERR_PTR(-ENOMEM);
214 atomic_set(&policy->refcnt, 1);
215 switch (mode) {
216 case MPOL_INTERLEAVE:
217 bitmap_copy(policy->v.nodes, nodes, MAX_NUMNODES);
218 if (bitmap_weight(nodes, MAX_NUMNODES) == 0) {
219 kmem_cache_free(policy_cache, policy);
220 return ERR_PTR(-EINVAL);
221 }
222 break;
223 case MPOL_PREFERRED:
224 policy->v.preferred_node = find_first_bit(nodes, MAX_NUMNODES);
225 if (policy->v.preferred_node >= MAX_NUMNODES)
226 policy->v.preferred_node = -1;
227 break;
228 case MPOL_BIND:
229 policy->v.zonelist = bind_zonelist(nodes);
230 if (policy->v.zonelist == NULL) {
231 kmem_cache_free(policy_cache, policy);
232 return ERR_PTR(-ENOMEM);
233 }
234 break;
235 }
236 policy->policy = mode;
237 return policy;
238}
239
240
241static int
242verify_pages(unsigned long addr, unsigned long end, unsigned long *nodes)
243{
244 while (addr < end) {
245 struct page *p;
246 pte_t *pte;
247 pmd_t *pmd;
248 pgd_t *pgd = pgd_offset_k(addr);
249 if (pgd_none(*pgd)) {
250 addr = (addr + PGDIR_SIZE) & PGDIR_MASK;
251 continue;
252 }
253 pmd = pmd_offset(pgd, addr);
254 if (pmd_none(*pmd)) {
255 addr = (addr + PMD_SIZE) & PMD_MASK;
256 continue;
257 }
258 p = NULL;
259 pte = pte_offset_map(pmd, addr);
260 if (pte_present(*pte))
261 p = pte_page(*pte);
262 pte_unmap(pte);
263 if (p) {
264 unsigned nid = page_to_nid(p);
265 if (!test_bit(nid, nodes))
266 return -EIO;
267 }
268 addr += PAGE_SIZE;
269 }
270 return 0;
271}
272
273
274static struct vm_area_struct *
275check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
276 unsigned long *nodes, unsigned long flags)
277{
278 int err;
279 struct vm_area_struct *first, *vma, *prev;
280
281 first = find_vma(mm, start);
282 if (!first)
283 return ERR_PTR(-EFAULT);
284 prev = NULL;
285 for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
286 if (!vma->vm_next && vma->vm_end < end)
287 return ERR_PTR(-EFAULT);
288 if (prev && prev->vm_end < vma->vm_start)
289 return ERR_PTR(-EFAULT);
290 if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) {
291 err = verify_pages(vma->vm_start, vma->vm_end, nodes);
292 if (err) {
293 first = ERR_PTR(err);
294 break;
295 }
296 }
297 prev = vma;
298 }
299 return first;
300}
301
302
303static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new)
304{
305 int err = 0;
306 struct mempolicy *old = vma->vm_policy;
307
308 PDprintk("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
309 vma->vm_start, vma->vm_end, vma->vm_pgoff,
310 vma->vm_ops, vma->vm_file,
311 vma->vm_ops ? vma->vm_ops->set_policy : NULL);
312
313 if (vma->vm_ops && vma->vm_ops->set_policy)
314 err = vma->vm_ops->set_policy(vma, new);
315 if (!err) {
316 mpol_get(new);
317 vma->vm_policy = new;
318 mpol_free(old);
319 }
320 return err;
321}
322
323
324static int mbind_range(struct vm_area_struct *vma, unsigned long start,
325 unsigned long end, struct mempolicy *new)
326{
327 struct vm_area_struct *next;
328 int err;
329
330 err = 0;
331 for (; vma && vma->vm_start < end; vma = next) {
332 next = vma->vm_next;
333 if (vma->vm_start < start)
334 err = split_vma(vma->vm_mm, vma, start, 1);
335 if (!err && vma->vm_end > end)
336 err = split_vma(vma->vm_mm, vma, end, 0);
337 if (!err)
338 err = policy_vma(vma, new);
339 if (err)
340 break;
341 }
342 return err;
343}
344
345
346asmlinkage long sys_mbind(unsigned long start, unsigned long len,
347 unsigned long mode,
348 unsigned long __user *nmask, unsigned long maxnode,
349 unsigned flags)
350{
351 struct vm_area_struct *vma;
352 struct mm_struct *mm = current->mm;
353 struct mempolicy *new;
354 unsigned long end;
355 DECLARE_BITMAP(nodes, MAX_NUMNODES);
356 int err;
357
358 if ((flags & ~(unsigned long)(MPOL_MF_STRICT)) || mode > MPOL_MAX)
359 return -EINVAL;
360 if (start & ~PAGE_MASK)
361 return -EINVAL;
362 if (mode == MPOL_DEFAULT)
363 flags &= ~MPOL_MF_STRICT;
364 len = (len + PAGE_SIZE - 1) & PAGE_MASK;
365 end = start + len;
366 if (end < start)
367 return -EINVAL;
368 if (end == start)
369 return 0;
370
371 err = get_nodes(nodes, nmask, maxnode, mode);
372 if (err)
373 return err;
374
375 if (mpol_check_policy(mode, nodes))
376 return -EINVAL;
377
378 new = mpol_new(mode, nodes);
379 if (IS_ERR(new))
380 return PTR_ERR(new);
381
382 PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
383 mode,nodes[0]);
384
385 down_write(&mm->mmap_sem);
386 vma = check_range(mm, start, end, nodes, flags);
387 err = PTR_ERR(vma);
388 if (!IS_ERR(vma))
389 err = mbind_range(vma, start, end, new);
390 up_write(&mm->mmap_sem);
391 mpol_free(new);
392 return err;
393}
394
395
396asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
397 unsigned long maxnode)
398{
399 int err;
400 struct mempolicy *new;
401 DECLARE_BITMAP(nodes, MAX_NUMNODES);
402
403 if (mode < 0 || mode > MPOL_MAX)
404 return -EINVAL;
405 err = get_nodes(nodes, nmask, maxnode, mode);
406 if (err)
407 return err;
408 if (mpol_check_policy(mode, nodes))
409 return -EINVAL;
410 new = mpol_new(mode, nodes);
411 if (IS_ERR(new))
412 return PTR_ERR(new);
413 mpol_free(current->mempolicy);
414 current->mempolicy = new;
415 if (new && new->policy == MPOL_INTERLEAVE)
416 current->il_next = find_first_bit(new->v.nodes, MAX_NUMNODES);
417 return 0;
418}
419
420
421static void get_zonemask(struct mempolicy *p, unsigned long *nodes)
422{
423 int i;
424
425 bitmap_zero(nodes, MAX_NUMNODES);
426 switch (p->policy) {
427 case MPOL_BIND:
428 for (i = 0; p->v.zonelist->zones[i]; i++)
429 __set_bit(p->v.zonelist->zones[i]->zone_pgdat->node_id, nodes);
430 break;
431 case MPOL_DEFAULT:
432 break;
433 case MPOL_INTERLEAVE:
434 bitmap_copy(nodes, p->v.nodes, MAX_NUMNODES);
435 break;
436 case MPOL_PREFERRED:
437
438 if (p->v.preferred_node < 0)
439 bitmap_copy(nodes, node_online_map, MAX_NUMNODES);
440 else
441 __set_bit(p->v.preferred_node, nodes);
442 break;
443 default:
444 BUG();
445 }
446}
447
448static int lookup_node(struct mm_struct *mm, unsigned long addr)
449{
450 struct page *p;
451 int err;
452
453 err = get_user_pages(current, mm, addr & PAGE_MASK, 1, 0, 0, &p, NULL);
454 if (err >= 0) {
455 err = page_to_nid(p);
456 put_page(p);
457 }
458 return err;
459}
460
461
462static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,
463 void *nodes, unsigned nbytes)
464{
465 unsigned long copy = ALIGN(maxnode-1, 64) / 8;
466
467 if (copy > nbytes) {
468 if (copy > PAGE_SIZE)
469 return -EINVAL;
470 if (clear_user((char __user *)mask + nbytes, copy - nbytes))
471 return -EFAULT;
472 copy = nbytes;
473 }
474 return copy_to_user(mask, nodes, copy) ? -EFAULT : 0;
475}
476
477
478asmlinkage long sys_get_mempolicy(int __user *policy,
479 unsigned long __user *nmask,
480 unsigned long maxnode,
481 unsigned long addr, unsigned long flags)
482{
483 int err, pval;
484 struct mm_struct *mm = current->mm;
485 struct vm_area_struct *vma = NULL;
486 struct mempolicy *pol = current->mempolicy;
487
488 if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
489 return -EINVAL;
490 if (nmask != NULL && maxnode < numnodes)
491 return -EINVAL;
492 if (flags & MPOL_F_ADDR) {
493 down_read(&mm->mmap_sem);
494 vma = find_vma_intersection(mm, addr, addr+1);
495 if (!vma) {
496 up_read(&mm->mmap_sem);
497 return -EFAULT;
498 }
499 if (vma->vm_ops && vma->vm_ops->get_policy)
500 pol = vma->vm_ops->get_policy(vma, addr);
501 else
502 pol = vma->vm_policy;
503 } else if (addr)
504 return -EINVAL;
505
506 if (!pol)
507 pol = &default_policy;
508
509 if (flags & MPOL_F_NODE) {
510 if (flags & MPOL_F_ADDR) {
511 err = lookup_node(mm, addr);
512 if (err < 0)
513 goto out;
514 pval = err;
515 } else if (pol == current->mempolicy &&
516 pol->policy == MPOL_INTERLEAVE) {
517 pval = current->il_next;
518 } else {
519 err = -EINVAL;
520 goto out;
521 }
522 } else
523 pval = pol->policy;
524
525 if (vma) {
526 up_read(¤t->mm->mmap_sem);
527 vma = NULL;
528 }
529
530 if (policy && put_user(pval, policy))
531 return -EFAULT;
532
533 err = 0;
534 if (nmask) {
535 DECLARE_BITMAP(nodes, MAX_NUMNODES);
536 get_zonemask(pol, nodes);
537 err = copy_nodes_to_user(nmask, maxnode, nodes, sizeof(nodes));
538 }
539
540 out:
541 if (vma)
542 up_read(¤t->mm->mmap_sem);
543 return err;
544}
545
546#ifdef CONFIG_COMPAT
547
548asmlinkage long compat_get_mempolicy(int __user *policy,
549 compat_ulong_t __user *nmask,
550 compat_ulong_t maxnode,
551 compat_ulong_t addr, compat_ulong_t flags)
552{
553 long err;
554 unsigned long __user *nm = NULL;
555 unsigned long nr_bits, alloc_size;
556 DECLARE_BITMAP(bm, MAX_NUMNODES);
557
558 nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
559 alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
560
561 if (nmask)
562 nm = compat_alloc_user_space(alloc_size);
563
564 err = sys_get_mempolicy(policy, nm, nr_bits+1, addr, flags);
565
566 if (!err && nmask) {
567 err = copy_from_user(bm, nm, alloc_size);
568
569 err |= clear_user(nmask, ALIGN(maxnode-1, 8) / 8);
570 err |= compat_put_bitmap(nmask, bm, nr_bits);
571 }
572
573 return err;
574}
575
576asmlinkage long compat_set_mempolicy(int mode, compat_ulong_t __user *nmask,
577 compat_ulong_t maxnode)
578{
579 long err = 0;
580 unsigned long __user *nm = NULL;
581 unsigned long nr_bits, alloc_size;
582 DECLARE_BITMAP(bm, MAX_NUMNODES);
583
584 nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
585 alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
586
587 if (nmask) {
588 err = compat_get_bitmap(bm, nmask, nr_bits);
589 nm = compat_alloc_user_space(alloc_size);
590 err |= copy_to_user(nm, bm, alloc_size);
591 }
592
593 if (err)
594 return -EFAULT;
595
596 return sys_set_mempolicy(mode, nm, nr_bits+1);
597}
598
599asmlinkage long compat_mbind(compat_ulong_t start, compat_ulong_t len,
600 compat_ulong_t mode, compat_ulong_t __user *nmask,
601 compat_ulong_t maxnode, compat_ulong_t flags)
602{
603 long err = 0;
604 unsigned long __user *nm = NULL;
605 unsigned long nr_bits, alloc_size;
606 DECLARE_BITMAP(bm, MAX_NUMNODES);
607
608 nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
609 alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
610
611 if (nmask) {
612 err = compat_get_bitmap(bm, nmask, nr_bits);
613 nm = compat_alloc_user_space(alloc_size);
614 err |= copy_to_user(nm, bm, alloc_size);
615 }
616
617 if (err)
618 return -EFAULT;
619
620 return sys_mbind(start, len, mode, nm, nr_bits+1, flags);
621}
622
623#endif
624
625
626struct mempolicy *
627get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr)
628{
629 struct mempolicy *pol = task->mempolicy;
630
631 if (vma) {
632 if (vma->vm_ops && vma->vm_ops->get_policy)
633 pol = vma->vm_ops->get_policy(vma, addr);
634 else if (vma->vm_policy &&
635 vma->vm_policy->policy != MPOL_DEFAULT)
636 pol = vma->vm_policy;
637 }
638 if (!pol)
639 pol = &default_policy;
640 return pol;
641}
642
643
644static struct zonelist *zonelist_policy(unsigned gfp, struct mempolicy *policy)
645{
646 int nd;
647
648 switch (policy->policy) {
649 case MPOL_PREFERRED:
650 nd = policy->v.preferred_node;
651 if (nd < 0)
652 nd = numa_node_id();
653 break;
654 case MPOL_BIND:
655
656 if (gfp >= policy_zone)
657 return policy->v.zonelist;
658
659 case MPOL_INTERLEAVE:
660 case MPOL_DEFAULT:
661 nd = numa_node_id();
662 break;
663 default:
664 nd = 0;
665 BUG();
666 }
667 return NODE_DATA(nd)->node_zonelists + (gfp & GFP_ZONEMASK);
668}
669
670
671static unsigned interleave_nodes(struct mempolicy *policy)
672{
673 unsigned nid, next;
674 struct task_struct *me = current;
675
676 nid = me->il_next;
677 BUG_ON(nid >= MAX_NUMNODES);
678 next = find_next_bit(policy->v.nodes, MAX_NUMNODES, 1+nid);
679 if (next >= MAX_NUMNODES)
680 next = find_first_bit(policy->v.nodes, MAX_NUMNODES);
681 me->il_next = next;
682 return nid;
683}
684
685
686static unsigned offset_il_node(struct mempolicy *pol,
687 struct vm_area_struct *vma, unsigned long off)
688{
689 unsigned nnodes = bitmap_weight(pol->v.nodes, MAX_NUMNODES);
690 unsigned target = (unsigned)off % nnodes;
691 int c;
692 int nid = -1;
693
694 c = 0;
695 do {
696 nid = find_next_bit(pol->v.nodes, MAX_NUMNODES, nid+1);
697 c++;
698 } while (c <= target);
699 BUG_ON(nid >= MAX_NUMNODES);
700 BUG_ON(!test_bit(nid, pol->v.nodes));
701 return nid;
702}
703
704
705
706static struct page *alloc_page_interleave(unsigned gfp, unsigned order, unsigned nid)
707{
708 struct zonelist *zl;
709 struct page *page;
710
711 BUG_ON(!test_bit(nid, node_online_map));
712 zl = NODE_DATA(nid)->node_zonelists + (gfp & GFP_ZONEMASK);
713 page = __alloc_pages(gfp, order, zl);
714 if (page && page_zone(page) == zl->zones[0]) {
715 zl->zones[0]->pageset[get_cpu()].interleave_hit++;
716 put_cpu();
717 }
718 return page;
719}
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743struct page *
744alloc_page_vma(unsigned gfp, struct vm_area_struct *vma, unsigned long addr)
745{
746 struct mempolicy *pol = get_vma_policy(current, vma, addr);
747
748 if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
749 unsigned nid;
750 if (vma) {
751 unsigned long off;
752 BUG_ON(addr >= vma->vm_end);
753 BUG_ON(addr < vma->vm_start);
754 off = vma->vm_pgoff;
755 off += (addr - vma->vm_start) >> PAGE_SHIFT;
756 nid = offset_il_node(pol, vma, off);
757 } else {
758
759 nid = interleave_nodes(pol);
760 }
761 return alloc_page_interleave(gfp, 0, nid);
762 }
763 return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol));
764}
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781struct page *alloc_pages_current(unsigned gfp, unsigned order)
782{
783 struct mempolicy *pol = current->mempolicy;
784
785 if (!pol || in_interrupt())
786 pol = &default_policy;
787 if (pol->policy == MPOL_INTERLEAVE)
788 return alloc_page_interleave(gfp, order, interleave_nodes(pol));
789 return __alloc_pages(gfp, order, zonelist_policy(gfp, pol));
790}
791EXPORT_SYMBOL(alloc_pages_current);
792
793
794struct mempolicy *__mpol_copy(struct mempolicy *old)
795{
796 struct mempolicy *new = kmem_cache_alloc(policy_cache, GFP_KERNEL);
797
798 if (!new)
799 return ERR_PTR(-ENOMEM);
800 *new = *old;
801 atomic_set(&new->refcnt, 1);
802 if (new->policy == MPOL_BIND) {
803 int sz = ksize(old->v.zonelist);
804 new->v.zonelist = kmalloc(sz, SLAB_KERNEL);
805 if (!new->v.zonelist) {
806 kmem_cache_free(policy_cache, new);
807 return ERR_PTR(-ENOMEM);
808 }
809 memcpy(new->v.zonelist, old->v.zonelist, sz);
810 }
811 return new;
812}
813
814
815int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
816{
817 if (!a || !b)
818 return 0;
819 if (a->policy != b->policy)
820 return 0;
821 switch (a->policy) {
822 case MPOL_DEFAULT:
823 return 1;
824 case MPOL_INTERLEAVE:
825 return bitmap_equal(a->v.nodes, b->v.nodes, MAX_NUMNODES);
826 case MPOL_PREFERRED:
827 return a->v.preferred_node == b->v.preferred_node;
828 case MPOL_BIND: {
829 int i;
830 for (i = 0; a->v.zonelist->zones[i]; i++)
831 if (a->v.zonelist->zones[i] != b->v.zonelist->zones[i])
832 return 0;
833 return b->v.zonelist->zones[i] == NULL;
834 }
835 default:
836 BUG();
837 return 0;
838 }
839}
840
841
842void __mpol_free(struct mempolicy *p)
843{
844 if (!atomic_dec_and_test(&p->refcnt))
845 return;
846 if (p->policy == MPOL_BIND)
847 kfree(p->v.zonelist);
848 p->policy = MPOL_DEFAULT;
849 kmem_cache_free(policy_cache, p);
850}
851
852
853
854
855
856
857
858int mpol_first_node(struct vm_area_struct *vma, unsigned long addr)
859{
860 struct mempolicy *pol = get_vma_policy(current, vma, addr);
861
862 switch (pol->policy) {
863 case MPOL_DEFAULT:
864 return numa_node_id();
865 case MPOL_BIND:
866 return pol->v.zonelist->zones[0]->zone_pgdat->node_id;
867 case MPOL_INTERLEAVE:
868 return interleave_nodes(pol);
869 case MPOL_PREFERRED:
870 return pol->v.preferred_node >= 0 ?
871 pol->v.preferred_node : numa_node_id();
872 }
873 BUG();
874 return 0;
875}
876
877
878int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr)
879{
880 struct mempolicy *pol = get_vma_policy(current, vma, addr);
881
882 switch (pol->policy) {
883 case MPOL_PREFERRED:
884 case MPOL_DEFAULT:
885 case MPOL_INTERLEAVE:
886 return 1;
887 case MPOL_BIND: {
888 struct zone **z;
889 for (z = pol->v.zonelist->zones; *z; z++)
890 if ((*z)->zone_pgdat->node_id == nid)
891 return 1;
892 return 0;
893 }
894 default:
895 BUG();
896 return 0;
897 }
898}
899
900
901
902
903
904
905
906
907
908
909
910
911static struct sp_node *
912sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end)
913{
914 struct rb_node *n = sp->root.rb_node;
915
916 while (n) {
917 struct sp_node *p = rb_entry(n, struct sp_node, nd);
918 if (start >= p->end) {
919 n = n->rb_right;
920 } else if (end < p->start) {
921 n = n->rb_left;
922 } else {
923 break;
924 }
925 }
926 if (!n)
927 return NULL;
928 for (;;) {
929 struct sp_node *w = NULL;
930 struct rb_node *prev = rb_prev(n);
931 if (!prev)
932 break;
933 w = rb_entry(prev, struct sp_node, nd);
934 if (w->end <= start)
935 break;
936 n = prev;
937 }
938 return rb_entry(n, struct sp_node, nd);
939}
940
941
942
943static void sp_insert(struct shared_policy *sp, struct sp_node *new)
944{
945 struct rb_node **p = &sp->root.rb_node;
946 struct rb_node *parent = NULL;
947 struct sp_node *nd;
948
949 while (*p) {
950 parent = *p;
951 nd = rb_entry(parent, struct sp_node, nd);
952 if (new->start < nd->start)
953 p = &(*p)->rb_left;
954 else if (new->end > nd->end)
955 p = &(*p)->rb_right;
956 else
957 BUG();
958 }
959 rb_link_node(&new->nd, parent, p);
960 rb_insert_color(&new->nd, &sp->root);
961 PDprintk("inserting %lx-%lx: %d\n", new->start, new->end,
962 new->policy ? new->policy->policy : 0);
963}
964
965
966struct mempolicy *
967mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
968{
969 struct mempolicy *pol = NULL;
970 struct sp_node *sn;
971
972 down(&sp->sem);
973 sn = sp_lookup(sp, idx, idx+1);
974 if (sn) {
975 mpol_get(sn->policy);
976 pol = sn->policy;
977 }
978 up(&sp->sem);
979 return pol;
980}
981
982static void sp_delete(struct shared_policy *sp, struct sp_node *n)
983{
984 PDprintk("deleting %lx-l%x\n", n->start, n->end);
985 rb_erase(&n->nd, &sp->root);
986 mpol_free(n->policy);
987 kmem_cache_free(sn_cache, n);
988}
989
990struct sp_node *
991sp_alloc(unsigned long start, unsigned long end, struct mempolicy *pol)
992{
993 struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL);
994
995 if (!n)
996 return NULL;
997 n->start = start;
998 n->end = end;
999 mpol_get(pol);
1000 n->policy = pol;
1001 return n;
1002}
1003
1004
1005static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
1006 unsigned long end, struct sp_node *new)
1007{
1008 struct sp_node *n, *new2;
1009
1010 down(&sp->sem);
1011 n = sp_lookup(sp, start, end);
1012
1013 while (n && n->start < end) {
1014 struct rb_node *next = rb_next(&n->nd);
1015 if (n->start >= start) {
1016 if (n->end <= end)
1017 sp_delete(sp, n);
1018 else
1019 n->start = end;
1020 } else {
1021
1022 if (n->end > end) {
1023 new2 = sp_alloc(end, n->end, n->policy);
1024 if (!new2) {
1025 up(&sp->sem);
1026 return -ENOMEM;
1027 }
1028 n->end = end;
1029 sp_insert(sp, new2);
1030 }
1031
1032 if (n->start < start && n->end > start)
1033 n->end = start;
1034 }
1035 if (!next)
1036 break;
1037 n = rb_entry(next, struct sp_node, nd);
1038 }
1039 if (new)
1040 sp_insert(sp, new);
1041 up(&sp->sem);
1042 return 0;
1043}
1044
1045int mpol_set_shared_policy(struct shared_policy *info,
1046 struct vm_area_struct *vma, struct mempolicy *npol)
1047{
1048 int err;
1049 struct sp_node *new = NULL;
1050 unsigned long sz = vma_pages(vma);
1051
1052 PDprintk("set_shared_policy %lx sz %lu %d %lx\n",
1053 vma->vm_pgoff,
1054 sz, npol? npol->policy : -1,
1055 npol ? npol->v.nodes[0] : -1);
1056
1057 if (npol) {
1058 new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, npol);
1059 if (!new)
1060 return -ENOMEM;
1061 }
1062 err = shared_policy_replace(info, vma->vm_pgoff, vma->vm_pgoff+sz, new);
1063 if (err && new)
1064 kmem_cache_free(sn_cache, new);
1065 return err;
1066}
1067
1068
1069void mpol_free_shared_policy(struct shared_policy *p)
1070{
1071 struct sp_node *n;
1072 struct rb_node *next;
1073
1074 down(&p->sem);
1075 next = rb_first(&p->root);
1076 while (next) {
1077 n = rb_entry(next, struct sp_node, nd);
1078 next = rb_next(&n->nd);
1079 rb_erase(&n->nd, &p->root);
1080 mpol_free(n->policy);
1081 kmem_cache_free(sn_cache, n);
1082 }
1083 up(&p->sem);
1084}
1085
1086
1087void __init numa_policy_init(void)
1088{
1089 policy_cache = kmem_cache_create("numa_policy",
1090 sizeof(struct mempolicy),
1091 0, SLAB_PANIC, NULL, NULL);
1092
1093 sn_cache = kmem_cache_create("shared_policy_node",
1094 sizeof(struct sp_node),
1095 0, SLAB_PANIC, NULL, NULL);
1096
1097
1098
1099
1100 if (sys_set_mempolicy(MPOL_INTERLEAVE, node_online_map, MAX_NUMNODES) < 0)
1101 printk("numa_policy_init: interleaving failed\n");
1102}
1103
1104
1105
1106void numa_default_policy(void)
1107{
1108 sys_set_mempolicy(MPOL_DEFAULT, NULL, 0);
1109}
1110