1
2
3
4
5
6
7
8#include <linux/config.h>
9#include <linux/mm.h>
10#include <linux/hugetlb.h>
11#include <linux/mman.h>
12#include <linux/slab.h>
13#include <linux/kernel_stat.h>
14#include <linux/swap.h>
15#include <linux/vmalloc.h>
16#include <linux/pagemap.h>
17#include <linux/namei.h>
18#include <linux/shm.h>
19#include <linux/blkdev.h>
20#include <linux/writeback.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/init.h>
24#include <linux/module.h>
25#include <linux/rmap.h>
26#include <linux/security.h>
27#include <linux/backing-dev.h>
28
29#include <asm/pgtable.h>
30#include <asm/tlbflush.h>
31#include <linux/swapops.h>
32
33spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
34unsigned int nr_swapfiles;
35long total_swap_pages;
36static int swap_overflow;
37
38EXPORT_SYMBOL(total_swap_pages);
39
40static const char Bad_file[] = "Bad swap file entry ";
41static const char Unused_file[] = "Unused swap file entry ";
42static const char Bad_offset[] = "Bad swap offset entry ";
43static const char Unused_offset[] = "Unused swap offset entry ";
44
45struct swap_list_t swap_list = {-1, -1};
46
47struct swap_info_struct swap_info[MAX_SWAPFILES];
48
49static DECLARE_MUTEX(swapon_sem);
50
51
52
53
54
55
56static DECLARE_RWSEM(swap_unplug_sem);
57
58#define SWAPFILE_CLUSTER 256
59
60void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
61{
62 swp_entry_t entry;
63
64 down_read(&swap_unplug_sem);
65 entry.val = page->private;
66 if (PageSwapCache(page)) {
67 struct block_device *bdev = swap_info[swp_type(entry)].bdev;
68 struct backing_dev_info *bdi;
69
70
71
72
73
74
75
76
77
78 WARN_ON(page_count(page) <= 1);
79
80 bdi = bdev->bd_inode->i_mapping->backing_dev_info;
81 bdi->unplug_io_fn(bdi, page);
82 }
83 up_read(&swap_unplug_sem);
84}
85
86static inline int scan_swap_map(struct swap_info_struct *si)
87{
88 unsigned long offset;
89
90
91
92
93
94
95
96
97 if (si->cluster_nr) {
98 while (si->cluster_next <= si->highest_bit) {
99 offset = si->cluster_next++;
100 if (si->swap_map[offset])
101 continue;
102 si->cluster_nr--;
103 goto got_page;
104 }
105 }
106 si->cluster_nr = SWAPFILE_CLUSTER;
107
108
109 offset = si->lowest_bit;
110 check_next_cluster:
111 if (offset+SWAPFILE_CLUSTER-1 <= si->highest_bit)
112 {
113 unsigned long nr;
114 for (nr = offset; nr < offset+SWAPFILE_CLUSTER; nr++)
115 if (si->swap_map[nr])
116 {
117 offset = nr+1;
118 goto check_next_cluster;
119 }
120
121
122
123 goto got_page;
124 }
125
126 for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
127 if (si->swap_map[offset])
128 continue;
129 si->lowest_bit = offset+1;
130 got_page:
131 if (offset == si->lowest_bit)
132 si->lowest_bit++;
133 if (offset == si->highest_bit)
134 si->highest_bit--;
135 if (si->lowest_bit > si->highest_bit) {
136 si->lowest_bit = si->max;
137 si->highest_bit = 0;
138 }
139 si->swap_map[offset] = 1;
140 si->inuse_pages++;
141 nr_swap_pages--;
142 si->cluster_next = offset+1;
143 return offset;
144 }
145 si->lowest_bit = si->max;
146 si->highest_bit = 0;
147 return 0;
148}
149
150swp_entry_t get_swap_page(void)
151{
152 struct swap_info_struct * p;
153 unsigned long offset;
154 swp_entry_t entry;
155 int type, wrapped = 0;
156
157 entry.val = 0;
158 swap_list_lock();
159 type = swap_list.next;
160 if (type < 0)
161 goto out;
162 if (nr_swap_pages <= 0)
163 goto out;
164
165 while (1) {
166 p = &swap_info[type];
167 if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) {
168 swap_device_lock(p);
169 offset = scan_swap_map(p);
170 swap_device_unlock(p);
171 if (offset) {
172 entry = swp_entry(type,offset);
173 type = swap_info[type].next;
174 if (type < 0 ||
175 p->prio != swap_info[type].prio) {
176 swap_list.next = swap_list.head;
177 } else {
178 swap_list.next = type;
179 }
180 goto out;
181 }
182 }
183 type = p->next;
184 if (!wrapped) {
185 if (type < 0 || p->prio != swap_info[type].prio) {
186 type = swap_list.head;
187 wrapped = 1;
188 }
189 } else
190 if (type < 0)
191 goto out;
192 }
193out:
194 swap_list_unlock();
195 return entry;
196}
197
198static struct swap_info_struct * swap_info_get(swp_entry_t entry)
199{
200 struct swap_info_struct * p;
201 unsigned long offset, type;
202
203 if (!entry.val)
204 goto out;
205 type = swp_type(entry);
206 if (type >= nr_swapfiles)
207 goto bad_nofile;
208 p = & swap_info[type];
209 if (!(p->flags & SWP_USED))
210 goto bad_device;
211 offset = swp_offset(entry);
212 if (offset >= p->max)
213 goto bad_offset;
214 if (!p->swap_map[offset])
215 goto bad_free;
216 swap_list_lock();
217 if (p->prio > swap_info[swap_list.next].prio)
218 swap_list.next = type;
219 swap_device_lock(p);
220 return p;
221
222bad_free:
223 printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val);
224 goto out;
225bad_offset:
226 printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val);
227 goto out;
228bad_device:
229 printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val);
230 goto out;
231bad_nofile:
232 printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val);
233out:
234 return NULL;
235}
236
237static void swap_info_put(struct swap_info_struct * p)
238{
239 swap_device_unlock(p);
240 swap_list_unlock();
241}
242
243static int swap_entry_free(struct swap_info_struct *p, unsigned long offset)
244{
245 int count = p->swap_map[offset];
246
247 if (count < SWAP_MAP_MAX) {
248 count--;
249 p->swap_map[offset] = count;
250 if (!count) {
251 if (offset < p->lowest_bit)
252 p->lowest_bit = offset;
253 if (offset > p->highest_bit)
254 p->highest_bit = offset;
255 nr_swap_pages++;
256 p->inuse_pages--;
257 }
258 }
259 return count;
260}
261
262
263
264
265
266void swap_free(swp_entry_t entry)
267{
268 struct swap_info_struct * p;
269
270 p = swap_info_get(entry);
271 if (p) {
272 swap_entry_free(p, swp_offset(entry));
273 swap_info_put(p);
274 }
275}
276
277
278
279
280
281static int exclusive_swap_page(struct page *page)
282{
283 int retval = 0;
284 struct swap_info_struct * p;
285 swp_entry_t entry;
286
287 entry.val = page->private;
288 p = swap_info_get(entry);
289 if (p) {
290
291 if (p->swap_map[swp_offset(entry)] == 1) {
292
293 spin_lock_irq(&swapper_space.tree_lock);
294 if (page_count(page) == 2)
295 retval = 1;
296 spin_unlock_irq(&swapper_space.tree_lock);
297 }
298 swap_info_put(p);
299 }
300 return retval;
301}
302
303
304
305
306
307
308
309
310
311int can_share_swap_page(struct page *page)
312{
313 int retval = 0;
314
315 if (!PageLocked(page))
316 BUG();
317 switch (page_count(page)) {
318 case 3:
319 if (!PagePrivate(page))
320 break;
321
322 case 2:
323 if (!PageSwapCache(page))
324 break;
325 retval = exclusive_swap_page(page);
326 break;
327 case 1:
328 if (PageReserved(page))
329 break;
330 retval = 1;
331 }
332 return retval;
333}
334
335
336
337
338
339int remove_exclusive_swap_page(struct page *page)
340{
341 int retval;
342 struct swap_info_struct * p;
343 swp_entry_t entry;
344
345 BUG_ON(PagePrivate(page));
346 BUG_ON(!PageLocked(page));
347
348 if (!PageSwapCache(page))
349 return 0;
350 if (PageWriteback(page))
351 return 0;
352 if (page_count(page) != 2)
353 return 0;
354
355 entry.val = page->private;
356 p = swap_info_get(entry);
357 if (!p)
358 return 0;
359
360
361 retval = 0;
362 if (p->swap_map[swp_offset(entry)] == 1) {
363
364 spin_lock_irq(&swapper_space.tree_lock);
365 if ((page_count(page) == 2) && !PageWriteback(page)) {
366 __delete_from_swap_cache(page);
367 SetPageDirty(page);
368 retval = 1;
369 }
370 spin_unlock_irq(&swapper_space.tree_lock);
371 }
372 swap_info_put(p);
373
374 if (retval) {
375 swap_free(entry);
376 page_cache_release(page);
377 }
378
379 return retval;
380}
381
382
383
384
385
386void free_swap_and_cache(swp_entry_t entry)
387{
388 struct swap_info_struct * p;
389 struct page *page = NULL;
390
391 p = swap_info_get(entry);
392 if (p) {
393 if (swap_entry_free(p, swp_offset(entry)) == 1) {
394 spin_lock_irq(&swapper_space.tree_lock);
395 page = radix_tree_lookup(&swapper_space.page_tree,
396 entry.val);
397 if (page && TestSetPageLocked(page))
398 page = NULL;
399 spin_unlock_irq(&swapper_space.tree_lock);
400 }
401 swap_info_put(p);
402 }
403 if (page) {
404 int one_user;
405
406 BUG_ON(PagePrivate(page));
407 page_cache_get(page);
408 one_user = (page_count(page) == 2);
409
410 if (!PageWriteback(page) && (one_user || vm_swap_full())) {
411 delete_from_swap_cache(page);
412 SetPageDirty(page);
413 }
414 unlock_page(page);
415 page_cache_release(page);
416 }
417}
418
419
420
421
422
423
424
425
426
427
428
429static void
430unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
431 swp_entry_t entry, struct page *page)
432{
433 vma->vm_mm->rss++;
434 get_page(page);
435 set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
436 page_add_anon_rmap(page, vma, address);
437 swap_free(entry);
438}
439
440
441static unsigned long unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
442 unsigned long address, unsigned long size, unsigned long offset,
443 swp_entry_t entry, struct page *page)
444{
445 pte_t * pte;
446 unsigned long end;
447 pte_t swp_pte = swp_entry_to_pte(entry);
448
449 if (pmd_none(*dir))
450 return 0;
451 if (pmd_bad(*dir)) {
452 pmd_ERROR(*dir);
453 pmd_clear(dir);
454 return 0;
455 }
456 pte = pte_offset_map(dir, address);
457 offset += address & PMD_MASK;
458 address &= ~PMD_MASK;
459 end = address + size;
460 if (end > PMD_SIZE)
461 end = PMD_SIZE;
462 do {
463
464
465
466
467 if (unlikely(pte_same(*pte, swp_pte))) {
468 unuse_pte(vma, offset + address, pte, entry, page);
469 pte_unmap(pte);
470
471
472
473
474
475 activate_page(page);
476
477
478 return 1 + offset + address;
479 }
480 address += PAGE_SIZE;
481 pte++;
482 } while (address && (address < end));
483 pte_unmap(pte - 1);
484 return 0;
485}
486
487
488static unsigned long unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
489 unsigned long address, unsigned long size,
490 swp_entry_t entry, struct page *page)
491{
492 pmd_t * pmd;
493 unsigned long offset, end;
494 unsigned long foundaddr;
495
496 if (pgd_none(*dir))
497 return 0;
498 if (pgd_bad(*dir)) {
499 pgd_ERROR(*dir);
500 pgd_clear(dir);
501 return 0;
502 }
503 pmd = pmd_offset(dir, address);
504 offset = address & PGDIR_MASK;
505 address &= ~PGDIR_MASK;
506 end = address + size;
507 if (end > PGDIR_SIZE)
508 end = PGDIR_SIZE;
509 if (address >= end)
510 BUG();
511 do {
512 foundaddr = unuse_pmd(vma, pmd, address, end - address,
513 offset, entry, page);
514 if (foundaddr)
515 return foundaddr;
516 address = (address + PMD_SIZE) & PMD_MASK;
517 pmd++;
518 } while (address && (address < end));
519 return 0;
520}
521
522
523static unsigned long unuse_vma(struct vm_area_struct * vma,
524 swp_entry_t entry, struct page *page)
525{
526 pgd_t *pgdir;
527 unsigned long start, end;
528 unsigned long foundaddr;
529
530 if (page->mapping) {
531 start = page_address_in_vma(page, vma);
532 if (start == -EFAULT)
533 return 0;
534 else
535 end = start + PAGE_SIZE;
536 } else {
537 start = vma->vm_start;
538 end = vma->vm_end;
539 }
540 pgdir = pgd_offset(vma->vm_mm, start);
541 do {
542 foundaddr = unuse_pgd(vma, pgdir, start, end - start,
543 entry, page);
544 if (foundaddr)
545 return foundaddr;
546 start = (start + PGDIR_SIZE) & PGDIR_MASK;
547 pgdir++;
548 } while (start && (start < end));
549 return 0;
550}
551
552static int unuse_process(struct mm_struct * mm,
553 swp_entry_t entry, struct page* page)
554{
555 struct vm_area_struct* vma;
556 unsigned long foundaddr = 0;
557
558
559
560
561 if (!down_read_trylock(&mm->mmap_sem)) {
562
563
564
565
566 unlock_page(page);
567 down_read(&mm->mmap_sem);
568 lock_page(page);
569 }
570 spin_lock(&mm->page_table_lock);
571 for (vma = mm->mmap; vma; vma = vma->vm_next) {
572 if (vma->anon_vma) {
573 foundaddr = unuse_vma(vma, entry, page);
574 if (foundaddr)
575 break;
576 }
577 }
578 spin_unlock(&mm->page_table_lock);
579 up_read(&mm->mmap_sem);
580
581
582
583
584 return 0;
585}
586
587
588
589
590
591static int find_next_to_unuse(struct swap_info_struct *si, int prev)
592{
593 int max = si->max;
594 int i = prev;
595 int count;
596
597
598
599
600
601
602
603 for (;;) {
604 if (++i >= max) {
605 if (!prev) {
606 i = 0;
607 break;
608 }
609
610
611
612
613 max = prev + 1;
614 prev = 0;
615 i = 1;
616 }
617 count = si->swap_map[i];
618 if (count && count != SWAP_MAP_BAD)
619 break;
620 }
621 return i;
622}
623
624
625
626
627
628
629static int try_to_unuse(unsigned int type)
630{
631 struct swap_info_struct * si = &swap_info[type];
632 struct mm_struct *start_mm;
633 unsigned short *swap_map;
634 unsigned short swcount;
635 struct page *page;
636 swp_entry_t entry;
637 int i = 0;
638 int retval = 0;
639 int reset_overflow = 0;
640 int shmem;
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656 start_mm = &init_mm;
657 atomic_inc(&init_mm.mm_users);
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672 while ((i = find_next_to_unuse(si, i)) != 0) {
673 if (signal_pending(current)) {
674 retval = -EINTR;
675 break;
676 }
677
678
679
680
681
682
683 swap_map = &si->swap_map[i];
684 entry = swp_entry(type, i);
685 page = read_swap_cache_async(entry, NULL, 0);
686 if (!page) {
687
688
689
690
691
692
693 if (!*swap_map)
694 continue;
695 retval = -ENOMEM;
696 break;
697 }
698
699
700
701
702 if (atomic_read(&start_mm->mm_users) == 1) {
703 mmput(start_mm);
704 start_mm = &init_mm;
705 atomic_inc(&init_mm.mm_users);
706 }
707
708
709
710
711
712
713
714
715
716 wait_on_page_locked(page);
717 wait_on_page_writeback(page);
718 lock_page(page);
719 wait_on_page_writeback(page);
720
721
722
723
724
725
726 shmem = 0;
727 swcount = *swap_map;
728 if (swcount > 1) {
729 if (start_mm == &init_mm)
730 shmem = shmem_unuse(entry, page);
731 else
732 retval = unuse_process(start_mm, entry, page);
733 }
734 if (*swap_map > 1) {
735 int set_start_mm = (*swap_map >= swcount);
736 struct list_head *p = &start_mm->mmlist;
737 struct mm_struct *new_start_mm = start_mm;
738 struct mm_struct *prev_mm = start_mm;
739 struct mm_struct *mm;
740
741 atomic_inc(&new_start_mm->mm_users);
742 atomic_inc(&prev_mm->mm_users);
743 spin_lock(&mmlist_lock);
744 while (*swap_map > 1 && !retval &&
745 (p = p->next) != &start_mm->mmlist) {
746 mm = list_entry(p, struct mm_struct, mmlist);
747 atomic_inc(&mm->mm_users);
748 spin_unlock(&mmlist_lock);
749 mmput(prev_mm);
750 prev_mm = mm;
751
752 cond_resched();
753
754 swcount = *swap_map;
755 if (swcount <= 1)
756 ;
757 else if (mm == &init_mm) {
758 set_start_mm = 1;
759 shmem = shmem_unuse(entry, page);
760 } else
761 retval = unuse_process(mm, entry, page);
762 if (set_start_mm && *swap_map < swcount) {
763 mmput(new_start_mm);
764 atomic_inc(&mm->mm_users);
765 new_start_mm = mm;
766 set_start_mm = 0;
767 }
768 spin_lock(&mmlist_lock);
769 }
770 spin_unlock(&mmlist_lock);
771 mmput(prev_mm);
772 mmput(start_mm);
773 start_mm = new_start_mm;
774 }
775 if (retval) {
776 unlock_page(page);
777 page_cache_release(page);
778 break;
779 }
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794 if (*swap_map == SWAP_MAP_MAX) {
795 swap_device_lock(si);
796 *swap_map = 1;
797 swap_device_unlock(si);
798 reset_overflow = 1;
799 }
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820 if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
821 struct writeback_control wbc = {
822 .sync_mode = WB_SYNC_NONE,
823 };
824
825 swap_writepage(page, &wbc);
826 lock_page(page);
827 wait_on_page_writeback(page);
828 }
829 if (PageSwapCache(page)) {
830 if (shmem)
831 swap_duplicate(entry);
832 else
833 delete_from_swap_cache(page);
834 }
835
836
837
838
839
840
841 SetPageDirty(page);
842 unlock_page(page);
843 page_cache_release(page);
844
845
846
847
848
849 cond_resched();
850 }
851
852 mmput(start_mm);
853 if (reset_overflow) {
854 printk(KERN_WARNING "swapoff: cleared swap entry overflow\n");
855 swap_overflow = 0;
856 }
857 return retval;
858}
859
860
861
862
863
864sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset)
865{
866 struct swap_extent *se = sis->curr_swap_extent;
867 struct swap_extent *start_se = se;
868
869 for ( ; ; ) {
870 struct list_head *lh;
871
872 if (se->start_page <= offset &&
873 offset < (se->start_page + se->nr_pages)) {
874 return se->start_block + (offset - se->start_page);
875 }
876 lh = se->list.prev;
877 if (lh == &sis->extent_list)
878 lh = lh->prev;
879 se = list_entry(lh, struct swap_extent, list);
880 sis->curr_swap_extent = se;
881 BUG_ON(se == start_se);
882 }
883}
884
885
886
887
888static void destroy_swap_extents(struct swap_info_struct *sis)
889{
890 while (!list_empty(&sis->extent_list)) {
891 struct swap_extent *se;
892
893 se = list_entry(sis->extent_list.next,
894 struct swap_extent, list);
895 list_del(&se->list);
896 kfree(se);
897 }
898 sis->nr_extents = 0;
899}
900
901
902
903
904
905
906
907
908static int
909add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
910 unsigned long nr_pages, sector_t start_block)
911{
912 struct swap_extent *se;
913 struct swap_extent *new_se;
914 struct list_head *lh;
915
916 lh = sis->extent_list.next;
917 while (lh != &sis->extent_list) {
918 se = list_entry(lh, struct swap_extent, list);
919 if (se->start_block + se->nr_pages == start_block &&
920 se->start_page + se->nr_pages == start_page) {
921
922 se->nr_pages += nr_pages;
923 return 0;
924 }
925 lh = lh->next;
926 }
927
928
929
930
931 new_se = kmalloc(sizeof(*se), GFP_KERNEL);
932 if (new_se == NULL)
933 return -ENOMEM;
934 new_se->start_page = start_page;
935 new_se->nr_pages = nr_pages;
936 new_se->start_block = start_block;
937
938 lh = sis->extent_list.prev;
939 while (lh != &sis->extent_list) {
940 se = list_entry(lh, struct swap_extent, list);
941 if (se->start_block > start_block)
942 break;
943 lh = lh->prev;
944 }
945 list_add_tail(&new_se->list, lh);
946 sis->nr_extents++;
947 return 0;
948}
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981static int setup_swap_extents(struct swap_info_struct *sis)
982{
983 struct inode *inode;
984 unsigned blocks_per_page;
985 unsigned long page_no;
986 unsigned blkbits;
987 sector_t probe_block;
988 sector_t last_block;
989 int ret;
990
991 inode = sis->swap_file->f_mapping->host;
992 if (S_ISBLK(inode->i_mode)) {
993 ret = add_swap_extent(sis, 0, sis->max, 0);
994 goto done;
995 }
996
997 blkbits = inode->i_blkbits;
998 blocks_per_page = PAGE_SIZE >> blkbits;
999
1000
1001
1002
1003
1004 probe_block = 0;
1005 page_no = 0;
1006 last_block = i_size_read(inode) >> blkbits;
1007 while ((probe_block + blocks_per_page) <= last_block &&
1008 page_no < sis->max) {
1009 unsigned block_in_page;
1010 sector_t first_block;
1011
1012 first_block = bmap(inode, probe_block);
1013 if (first_block == 0)
1014 goto bad_bmap;
1015
1016
1017
1018
1019 if (first_block & (blocks_per_page - 1)) {
1020 probe_block++;
1021 goto reprobe;
1022 }
1023
1024 for (block_in_page = 1; block_in_page < blocks_per_page;
1025 block_in_page++) {
1026 sector_t block;
1027
1028 block = bmap(inode, probe_block + block_in_page);
1029 if (block == 0)
1030 goto bad_bmap;
1031 if (block != first_block + block_in_page) {
1032
1033 probe_block++;
1034 goto reprobe;
1035 }
1036 }
1037
1038
1039
1040
1041 ret = add_swap_extent(sis, page_no, 1,
1042 first_block >> (PAGE_SHIFT - blkbits));
1043 if (ret)
1044 goto out;
1045 page_no++;
1046 probe_block += blocks_per_page;
1047reprobe:
1048 continue;
1049 }
1050 ret = 0;
1051 if (page_no == 0)
1052 ret = -EINVAL;
1053 sis->max = page_no;
1054 sis->highest_bit = page_no - 1;
1055done:
1056 sis->curr_swap_extent = list_entry(sis->extent_list.prev,
1057 struct swap_extent, list);
1058 goto out;
1059bad_bmap:
1060 printk(KERN_ERR "swapon: swapfile has holes\n");
1061 ret = -EINVAL;
1062out:
1063 return ret;
1064}
1065
1066#if 0
1067#include <linux/backing-dev.h>
1068int page_queue_congested(struct page *page)
1069{
1070 struct backing_dev_info *bdi;
1071
1072 BUG_ON(!PageLocked(page));
1073
1074 if (PageSwapCache(page)) {
1075 swp_entry_t entry = { .val = page->private };
1076 struct swap_info_struct *sis;
1077
1078 sis = get_swap_info_struct(swp_type(entry));
1079 bdi = sis->bdev->bd_inode->i_mapping->backing_dev_info;
1080 } else
1081 bdi = page->mapping->backing_dev_info;
1082 return bdi_write_congested(bdi);
1083}
1084#endif
1085
1086asmlinkage long sys_swapoff(const char __user * specialfile)
1087{
1088 struct swap_info_struct * p = NULL;
1089 unsigned short *swap_map;
1090 struct file *swap_file, *victim;
1091 struct address_space *mapping;
1092 struct inode *inode;
1093 char * pathname;
1094 int i, type, prev;
1095 int err;
1096
1097 if (!capable(CAP_SYS_ADMIN))
1098 return -EPERM;
1099
1100 pathname = getname(specialfile);
1101 err = PTR_ERR(pathname);
1102 if (IS_ERR(pathname))
1103 goto out;
1104
1105 victim = filp_open(pathname, O_RDWR|O_LARGEFILE, 0);
1106 putname(pathname);
1107 err = PTR_ERR(victim);
1108 if (IS_ERR(victim))
1109 goto out;
1110
1111 mapping = victim->f_mapping;
1112 prev = -1;
1113 swap_list_lock();
1114 for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1115 p = swap_info + type;
1116 if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) {
1117 if (p->swap_file->f_mapping == mapping)
1118 break;
1119 }
1120 prev = type;
1121 }
1122 if (type < 0) {
1123 err = -EINVAL;
1124 swap_list_unlock();
1125 goto out_dput;
1126 }
1127 if (!security_vm_enough_memory(p->pages))
1128 vm_unacct_memory(p->pages);
1129 else {
1130 err = -ENOMEM;
1131 swap_list_unlock();
1132 goto out_dput;
1133 }
1134 if (prev < 0) {
1135 swap_list.head = p->next;
1136 } else {
1137 swap_info[prev].next = p->next;
1138 }
1139 if (type == swap_list.next) {
1140
1141 swap_list.next = swap_list.head;
1142 }
1143 nr_swap_pages -= p->pages;
1144 total_swap_pages -= p->pages;
1145 p->flags &= ~SWP_WRITEOK;
1146 swap_list_unlock();
1147 current->flags |= PF_SWAPOFF;
1148 err = try_to_unuse(type);
1149 current->flags &= ~PF_SWAPOFF;
1150
1151
1152 down_write(&swap_unplug_sem);
1153 up_write(&swap_unplug_sem);
1154
1155 if (err) {
1156
1157 swap_list_lock();
1158 for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next)
1159 if (p->prio >= swap_info[i].prio)
1160 break;
1161 p->next = i;
1162 if (prev < 0)
1163 swap_list.head = swap_list.next = p - swap_info;
1164 else
1165 swap_info[prev].next = p - swap_info;
1166 nr_swap_pages += p->pages;
1167 total_swap_pages += p->pages;
1168 p->flags |= SWP_WRITEOK;
1169 swap_list_unlock();
1170 goto out_dput;
1171 }
1172 down(&swapon_sem);
1173 swap_list_lock();
1174 swap_device_lock(p);
1175 swap_file = p->swap_file;
1176 p->swap_file = NULL;
1177 p->max = 0;
1178 swap_map = p->swap_map;
1179 p->swap_map = NULL;
1180 p->flags = 0;
1181 destroy_swap_extents(p);
1182 swap_device_unlock(p);
1183 swap_list_unlock();
1184 up(&swapon_sem);
1185 vfree(swap_map);
1186 inode = mapping->host;
1187 if (S_ISBLK(inode->i_mode)) {
1188 struct block_device *bdev = I_BDEV(inode);
1189 set_blocksize(bdev, p->old_block_size);
1190 bd_release(bdev);
1191 } else {
1192 down(&inode->i_sem);
1193 inode->i_flags &= ~S_SWAPFILE;
1194 up(&inode->i_sem);
1195 }
1196 filp_close(swap_file, NULL);
1197 err = 0;
1198
1199out_dput:
1200 filp_close(victim, NULL);
1201out:
1202 return err;
1203}
1204
1205#ifdef CONFIG_PROC_FS
1206
1207static void *swap_start(struct seq_file *swap, loff_t *pos)
1208{
1209 struct swap_info_struct *ptr = swap_info;
1210 int i;
1211 loff_t l = *pos;
1212
1213 down(&swapon_sem);
1214
1215 for (i = 0; i < nr_swapfiles; i++, ptr++) {
1216 if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
1217 continue;
1218 if (!l--)
1219 return ptr;
1220 }
1221
1222 return NULL;
1223}
1224
1225static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
1226{
1227 struct swap_info_struct *ptr = v;
1228 struct swap_info_struct *endptr = swap_info + nr_swapfiles;
1229
1230 for (++ptr; ptr < endptr; ptr++) {
1231 if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
1232 continue;
1233 ++*pos;
1234 return ptr;
1235 }
1236
1237 return NULL;
1238}
1239
1240static void swap_stop(struct seq_file *swap, void *v)
1241{
1242 up(&swapon_sem);
1243}
1244
1245static int swap_show(struct seq_file *swap, void *v)
1246{
1247 struct swap_info_struct *ptr = v;
1248 struct file *file;
1249 int len;
1250
1251 if (v == swap_info)
1252 seq_puts(swap, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
1253
1254 file = ptr->swap_file;
1255 len = seq_path(swap, file->f_vfsmnt, file->f_dentry, " \t\n\\");
1256 seq_printf(swap, "%*s%s\t%d\t%ld\t%d\n",
1257 len < 40 ? 40 - len : 1, " ",
1258 S_ISBLK(file->f_dentry->d_inode->i_mode) ?
1259 "partition" : "file\t",
1260 ptr->pages << (PAGE_SHIFT - 10),
1261 ptr->inuse_pages << (PAGE_SHIFT - 10),
1262 ptr->prio);
1263 return 0;
1264}
1265
1266static struct seq_operations swaps_op = {
1267 .start = swap_start,
1268 .next = swap_next,
1269 .stop = swap_stop,
1270 .show = swap_show
1271};
1272
1273static int swaps_open(struct inode *inode, struct file *file)
1274{
1275 return seq_open(file, &swaps_op);
1276}
1277
1278static struct file_operations proc_swaps_operations = {
1279 .open = swaps_open,
1280 .read = seq_read,
1281 .llseek = seq_lseek,
1282 .release = seq_release,
1283};
1284
1285static int __init procswaps_init(void)
1286{
1287 struct proc_dir_entry *entry;
1288
1289 entry = create_proc_entry("swaps", 0, NULL);
1290 if (entry)
1291 entry->proc_fops = &proc_swaps_operations;
1292 return 0;
1293}
1294__initcall(procswaps_init);
1295#endif
1296
1297
1298
1299
1300
1301
1302asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1303{
1304 struct swap_info_struct * p;
1305 char *name = NULL;
1306 struct block_device *bdev = NULL;
1307 struct file *swap_file = NULL;
1308 struct address_space *mapping;
1309 unsigned int type;
1310 int i, prev;
1311 int error;
1312 static int least_priority;
1313 union swap_header *swap_header = NULL;
1314 int swap_header_version;
1315 int nr_good_pages = 0;
1316 unsigned long maxpages = 1;
1317 int swapfilesize;
1318 unsigned short *swap_map;
1319 struct page *page = NULL;
1320 struct inode *inode = NULL;
1321 int did_down = 0;
1322
1323 if (!capable(CAP_SYS_ADMIN))
1324 return -EPERM;
1325 swap_list_lock();
1326 p = swap_info;
1327 for (type = 0 ; type < nr_swapfiles ; type++,p++)
1328 if (!(p->flags & SWP_USED))
1329 break;
1330 error = -EPERM;
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343 if (type > swp_type(pte_to_swp_entry(swp_entry_to_pte(swp_entry(~0UL,0))))) {
1344 swap_list_unlock();
1345 goto out;
1346 }
1347 if (type >= nr_swapfiles)
1348 nr_swapfiles = type+1;
1349 INIT_LIST_HEAD(&p->extent_list);
1350 p->flags = SWP_USED;
1351 p->nr_extents = 0;
1352 p->swap_file = NULL;
1353 p->old_block_size = 0;
1354 p->swap_map = NULL;
1355 p->lowest_bit = 0;
1356 p->highest_bit = 0;
1357 p->cluster_nr = 0;
1358 p->inuse_pages = 0;
1359 p->sdev_lock = SPIN_LOCK_UNLOCKED;
1360 p->next = -1;
1361 if (swap_flags & SWAP_FLAG_PREFER) {
1362 p->prio =
1363 (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
1364 } else {
1365 p->prio = --least_priority;
1366 }
1367 swap_list_unlock();
1368 name = getname(specialfile);
1369 error = PTR_ERR(name);
1370 if (IS_ERR(name)) {
1371 name = NULL;
1372 goto bad_swap_2;
1373 }
1374 swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0);
1375 error = PTR_ERR(swap_file);
1376 if (IS_ERR(swap_file)) {
1377 swap_file = NULL;
1378 goto bad_swap_2;
1379 }
1380
1381 p->swap_file = swap_file;
1382 mapping = swap_file->f_mapping;
1383 inode = mapping->host;
1384
1385 error = -EBUSY;
1386 for (i = 0; i < nr_swapfiles; i++) {
1387 struct swap_info_struct *q = &swap_info[i];
1388
1389 if (i == type || !q->swap_file)
1390 continue;
1391 if (mapping == q->swap_file->f_mapping)
1392 goto bad_swap;
1393 }
1394
1395 error = -EINVAL;
1396 if (S_ISBLK(inode->i_mode)) {
1397 bdev = I_BDEV(inode);
1398 error = bd_claim(bdev, sys_swapon);
1399 if (error < 0) {
1400 bdev = NULL;
1401 goto bad_swap;
1402 }
1403 p->old_block_size = block_size(bdev);
1404 error = set_blocksize(bdev, PAGE_SIZE);
1405 if (error < 0)
1406 goto bad_swap;
1407 p->bdev = bdev;
1408 } else if (S_ISREG(inode->i_mode)) {
1409 p->bdev = inode->i_sb->s_bdev;
1410 down(&inode->i_sem);
1411 did_down = 1;
1412 if (IS_SWAPFILE(inode)) {
1413 error = -EBUSY;
1414 goto bad_swap;
1415 }
1416 } else {
1417 goto bad_swap;
1418 }
1419
1420 swapfilesize = i_size_read(inode) >> PAGE_SHIFT;
1421
1422
1423
1424
1425 if (!mapping->a_ops->readpage) {
1426 error = -EINVAL;
1427 goto bad_swap;
1428 }
1429 page = read_cache_page(mapping, 0,
1430 (filler_t *)mapping->a_ops->readpage, swap_file);
1431 if (IS_ERR(page)) {
1432 error = PTR_ERR(page);
1433 goto bad_swap;
1434 }
1435 wait_on_page_locked(page);
1436 if (!PageUptodate(page))
1437 goto bad_swap;
1438 kmap(page);
1439 swap_header = page_address(page);
1440
1441 if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10))
1442 swap_header_version = 1;
1443 else if (!memcmp("SWAPSPACE2",swap_header->magic.magic,10))
1444 swap_header_version = 2;
1445 else {
1446 printk("Unable to find swap-space signature\n");
1447 error = -EINVAL;
1448 goto bad_swap;
1449 }
1450
1451 switch (swap_header_version) {
1452 case 1:
1453 printk(KERN_ERR "version 0 swap is no longer supported. "
1454 "Use mkswap -v1 %s\n", name);
1455 error = -EINVAL;
1456 goto bad_swap;
1457 case 2:
1458
1459
1460 if (swap_header->info.version != 1) {
1461 printk(KERN_WARNING
1462 "Unable to handle swap header version %d\n",
1463 swap_header->info.version);
1464 error = -EINVAL;
1465 goto bad_swap;
1466 }
1467
1468 p->lowest_bit = 1;
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483 maxpages = swp_offset(pte_to_swp_entry(swp_entry_to_pte(swp_entry(0,~0UL)))) - 1;
1484 if (maxpages > swap_header->info.last_page)
1485 maxpages = swap_header->info.last_page;
1486 p->highest_bit = maxpages - 1;
1487
1488 error = -EINVAL;
1489 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
1490 goto bad_swap;
1491
1492
1493 if (!(p->swap_map = vmalloc(maxpages * sizeof(short)))) {
1494 error = -ENOMEM;
1495 goto bad_swap;
1496 }
1497
1498 error = 0;
1499 memset(p->swap_map, 0, maxpages * sizeof(short));
1500 for (i=0; i<swap_header->info.nr_badpages; i++) {
1501 int page = swap_header->info.badpages[i];
1502 if (page <= 0 || page >= swap_header->info.last_page)
1503 error = -EINVAL;
1504 else
1505 p->swap_map[page] = SWAP_MAP_BAD;
1506 }
1507 nr_good_pages = swap_header->info.last_page -
1508 swap_header->info.nr_badpages -
1509 1 ;
1510 if (error)
1511 goto bad_swap;
1512 }
1513
1514 if (swapfilesize && maxpages > swapfilesize) {
1515 printk(KERN_WARNING
1516 "Swap area shorter than signature indicates\n");
1517 error = -EINVAL;
1518 goto bad_swap;
1519 }
1520 if (!nr_good_pages) {
1521 printk(KERN_WARNING "Empty swap-file\n");
1522 error = -EINVAL;
1523 goto bad_swap;
1524 }
1525 p->swap_map[0] = SWAP_MAP_BAD;
1526 p->max = maxpages;
1527 p->pages = nr_good_pages;
1528
1529 error = setup_swap_extents(p);
1530 if (error)
1531 goto bad_swap;
1532
1533 down(&swapon_sem);
1534 swap_list_lock();
1535 swap_device_lock(p);
1536 p->flags = SWP_ACTIVE;
1537 nr_swap_pages += nr_good_pages;
1538 total_swap_pages += nr_good_pages;
1539 printk(KERN_INFO "Adding %dk swap on %s. Priority:%d extents:%d\n",
1540 nr_good_pages<<(PAGE_SHIFT-10), name,
1541 p->prio, p->nr_extents);
1542
1543
1544 prev = -1;
1545 for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
1546 if (p->prio >= swap_info[i].prio) {
1547 break;
1548 }
1549 prev = i;
1550 }
1551 p->next = i;
1552 if (prev < 0) {
1553 swap_list.head = swap_list.next = p - swap_info;
1554 } else {
1555 swap_info[prev].next = p - swap_info;
1556 }
1557 swap_device_unlock(p);
1558 swap_list_unlock();
1559 up(&swapon_sem);
1560 error = 0;
1561 goto out;
1562bad_swap:
1563 if (bdev) {
1564 set_blocksize(bdev, p->old_block_size);
1565 bd_release(bdev);
1566 }
1567bad_swap_2:
1568 swap_list_lock();
1569 swap_map = p->swap_map;
1570 p->swap_file = NULL;
1571 p->swap_map = NULL;
1572 p->flags = 0;
1573 if (!(swap_flags & SWAP_FLAG_PREFER))
1574 ++least_priority;
1575 swap_list_unlock();
1576 destroy_swap_extents(p);
1577 if (swap_map)
1578 vfree(swap_map);
1579 if (swap_file)
1580 filp_close(swap_file, NULL);
1581out:
1582 if (page && !IS_ERR(page)) {
1583 kunmap(page);
1584 page_cache_release(page);
1585 }
1586 if (name)
1587 putname(name);
1588 if (did_down) {
1589 if (!error)
1590 inode->i_flags |= S_SWAPFILE;
1591 up(&inode->i_sem);
1592 }
1593 return error;
1594}
1595
1596void si_swapinfo(struct sysinfo *val)
1597{
1598 unsigned int i;
1599 unsigned long nr_to_be_unused = 0;
1600
1601 swap_list_lock();
1602 for (i = 0; i < nr_swapfiles; i++) {
1603 if (!(swap_info[i].flags & SWP_USED) ||
1604 (swap_info[i].flags & SWP_WRITEOK))
1605 continue;
1606 nr_to_be_unused += swap_info[i].inuse_pages;
1607 }
1608 val->freeswap = nr_swap_pages + nr_to_be_unused;
1609 val->totalswap = total_swap_pages + nr_to_be_unused;
1610 swap_list_unlock();
1611}
1612
1613
1614
1615
1616
1617
1618
1619int swap_duplicate(swp_entry_t entry)
1620{
1621 struct swap_info_struct * p;
1622 unsigned long offset, type;
1623 int result = 0;
1624
1625 type = swp_type(entry);
1626 if (type >= nr_swapfiles)
1627 goto bad_file;
1628 p = type + swap_info;
1629 offset = swp_offset(entry);
1630
1631 swap_device_lock(p);
1632 if (offset < p->max && p->swap_map[offset]) {
1633 if (p->swap_map[offset] < SWAP_MAP_MAX - 1) {
1634 p->swap_map[offset]++;
1635 result = 1;
1636 } else if (p->swap_map[offset] <= SWAP_MAP_MAX) {
1637 if (swap_overflow++ < 5)
1638 printk(KERN_WARNING "swap_dup: swap entry overflow\n");
1639 p->swap_map[offset] = SWAP_MAP_MAX;
1640 result = 1;
1641 }
1642 }
1643 swap_device_unlock(p);
1644out:
1645 return result;
1646
1647bad_file:
1648 printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
1649 goto out;
1650}
1651
1652struct swap_info_struct *
1653get_swap_info_struct(unsigned type)
1654{
1655 return &swap_info[type];
1656}
1657
1658
1659
1660
1661
1662int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
1663{
1664 int ret = 0, i = 1 << page_cluster;
1665 unsigned long toff;
1666 struct swap_info_struct *swapdev = swp_type(entry) + swap_info;
1667
1668 if (!page_cluster)
1669 return 0;
1670 toff = (swp_offset(entry) >> page_cluster) << page_cluster;
1671 if (!toff)
1672 toff++, i--;
1673 *offset = toff;
1674
1675 swap_device_lock(swapdev);
1676 do {
1677
1678 if (toff >= swapdev->max)
1679 break;
1680
1681 if (!swapdev->swap_map[toff])
1682 break;
1683 if (swapdev->swap_map[toff] == SWAP_MAP_BAD)
1684 break;
1685 toff++;
1686 ret++;
1687 } while (--i);
1688 swap_device_unlock(swapdev);
1689 return ret;
1690}
1691