1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/kernel.h>
14#include <linux/module.h>
15#include <linux/spinlock.h>
16#include <linux/fs.h>
17#include <linux/mm.h>
18#include <linux/swap.h>
19#include <linux/slab.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/init.h>
23#include <linux/backing-dev.h>
24#include <linux/task_io_accounting_ops.h>
25#include <linux/blkdev.h>
26#include <linux/mpage.h>
27#include <linux/percpu.h>
28#include <linux/notifier.h>
29#include <linux/smp.h>
30#include <linux/sysctl.h>
31#include <linux/cpu.h>
32#include <linux/syscalls.h>
33
34
35
36
37
38
39
40
41int max_writeback_pages = 1024;
42
43
44
45
46
47static long ratelimit_pages = 32;
48
49static long total_pages;
50static int dirty_exceeded;
51
52
53
54
55
56
57
58static inline long sync_writeback_pages(void)
59{
60 return ratelimit_pages + ratelimit_pages / 2;
61}
62
63
64
65
66
67
68int dirty_background_ratio = 10;
69
70
71
72
73int vm_dirty_ratio = 40;
74
75int vm_max_queue_depth = 0;
76
77int write_mapped = 1;
78
79
80
81
82
83int dirty_writeback_centisecs = 5 * 100;
84
85
86
87
88int dirty_expire_centisecs = 30 * 100;
89
90
91
92
93int block_dump;
94
95
96
97
98int laptop_mode;
99
100EXPORT_SYMBOL(laptop_mode);
101
102int nfs_writeback_lowmem_only = 0;
103
104EXPORT_SYMBOL(nfs_writeback_lowmem_only);
105
106
107
108atomic_t nr_mapped_high = ATOMIC_INIT(0);
109
110static void background_writeout(unsigned long _min_pages);
111
112struct writeback_state
113{
114 unsigned long nr_dirty;
115 unsigned long nr_unstable;
116 unsigned long nr_mapped;
117 unsigned long nr_mapped_high;
118 unsigned long nr_writeback;
119};
120
121static void get_writeback_state(struct writeback_state *wbs)
122{
123 wbs->nr_dirty = read_page_state(nr_dirty);
124 wbs->nr_unstable = read_page_state(nr_unstable);
125 wbs->nr_mapped = read_page_state(nr_mapped);
126 wbs->nr_mapped_high = atomic_read(&nr_mapped_high);
127 wbs->nr_writeback = read_page_state(nr_writeback);
128}
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147static void
148get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty)
149{
150 int background_ratio;
151 int dirty_ratio = vm_dirty_ratio;
152 long background;
153 long dirty;
154 unsigned long available_memory;
155 unsigned long mapped;
156 struct task_struct *tsk;
157 struct zone *zone;
158
159 get_writeback_state(wbs);
160
161
162
163
164
165
166
167
168 available_memory = read_page_state(nr_slab) / 10;
169
170 for_each_zone(zone) {
171#ifdef CONFIG_HIGHMEM
172
173
174
175
176 if (!write_mapped && is_highmem(zone))
177 continue;
178#endif
179 available_memory += zone->nr_active;
180 available_memory += zone->nr_inactive;
181 available_memory += zone->free_pages;
182 }
183 if (!write_mapped) {
184 if (wbs->nr_mapped > wbs->nr_mapped_high)
185 mapped = wbs->nr_mapped - wbs->nr_mapped_high;
186 else
187 mapped = 0;
188 if (mapped < available_memory)
189 available_memory -= mapped;
190 else
191 available_memory = 1;
192 }
193
194 if (dirty_ratio > 100)
195 dirty_ratio = 100;
196 if (dirty_ratio < 1)
197 dirty_ratio = 1;
198
199 background_ratio = dirty_background_ratio;
200 if (background_ratio >= dirty_ratio)
201 background_ratio = ((dirty_ratio>1)?dirty_ratio/2:dirty_ratio);
202
203 background = (background_ratio * available_memory) / 100;
204 dirty = (dirty_ratio * available_memory) / 100;
205 tsk = current;
206 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
207 background += background / 4;
208 dirty += dirty / 4;
209 }
210
211
212
213 if (vm_max_queue_depth && (vm_max_queue_depth<<(20-PAGE_SHIFT)) < dirty) {
214 dirty = vm_max_queue_depth<<(20-PAGE_SHIFT);
215 if (background > dirty) {
216 if (dirty_ratio > background_ratio)
217 background = dirty/2;
218 else
219 background = dirty;
220 }
221 }
222 *pbackground = background;
223 *pdirty = dirty;
224}
225
226
227
228
229
230
231
232
233static void balance_dirty_pages(struct address_space *mapping)
234{
235 struct writeback_state wbs;
236 long nr_reclaimable;
237 long background_thresh;
238 long dirty_thresh;
239 unsigned long pages_written = 0;
240 unsigned long write_chunk = sync_writeback_pages();
241 struct backing_dev_info *bdi = mapping->backing_dev_info;
242 long nr_dirty_p = -1;
243 long nr_dirty;
244
245 for (;;) {
246 get_dirty_limits(&wbs, &background_thresh,
247 &dirty_thresh);
248 nr_reclaimable = wbs.nr_dirty + wbs.nr_unstable;
249 nr_dirty = nr_reclaimable + wbs.nr_writeback;
250 if (nr_dirty <= dirty_thresh)
251 break;
252
253 dirty_exceeded = 1;
254
255 if (nr_dirty < nr_dirty_p) {
256 pages_written += nr_dirty_p - nr_dirty;
257 if (pages_written >= write_chunk)
258 break;
259 }
260 nr_dirty_p = nr_dirty;
261
262 if (!writeback_in_progress(bdi))
263 pdflush_operation(background_writeout, 0);
264
265 blk_congestion_wait(WRITE, HZ/10);
266 }
267
268 if (nr_dirty <= dirty_thresh)
269 dirty_exceeded = 0;
270
271 if (writeback_in_progress(bdi))
272 return;
273
274
275
276
277
278
279
280
281
282 if ((laptop_mode && pages_written) ||
283 (!laptop_mode && (nr_reclaimable > background_thresh)))
284 pdflush_operation(background_writeout, 0);
285}
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300void balance_dirty_pages_ratelimited(struct address_space *mapping)
301{
302 static DEFINE_PER_CPU(int, ratelimits) = 0;
303 long ratelimit;
304
305 ratelimit = ratelimit_pages;
306 if (dirty_exceeded)
307 ratelimit = 8;
308
309
310
311
312
313 if (get_cpu_var(ratelimits)++ >= ratelimit) {
314 __get_cpu_var(ratelimits) = 0;
315 put_cpu_var(ratelimits);
316 balance_dirty_pages(mapping);
317 return;
318 }
319 put_cpu_var(ratelimits);
320}
321EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
322
323void throttle_vm_writeout(void)
324{
325 struct writeback_state wbs;
326 long background_thresh;
327 long dirty_thresh;
328
329 for ( ; ; ) {
330 get_dirty_limits(&wbs, &background_thresh, &dirty_thresh);
331
332
333
334
335
336 dirty_thresh += dirty_thresh / 10;
337
338 if (wbs.nr_unstable + wbs.nr_writeback <= dirty_thresh)
339 break;
340 blk_congestion_wait(WRITE, HZ/10);
341 }
342}
343
344
345
346
347
348
349static void background_writeout(unsigned long _min_pages)
350{
351 long min_pages = _min_pages;
352 long min_writeback = ratelimit_pages * 2 * num_online_cpus();
353 struct writeback_control wbc = {
354 .bdi = NULL,
355 .sync_mode = WB_SYNC_NONE,
356 .older_than_this = NULL,
357 .nr_to_write = 0,
358 .nonblocking = 1,
359 };
360 long nr_writeback_p = -1;
361 long written = 0;
362
363 for ( ; ; ) {
364 struct writeback_state wbs;
365 long background_thresh;
366 long dirty_thresh;
367
368 get_dirty_limits(&wbs, &background_thresh, &dirty_thresh);
369 if (wbs.nr_dirty + wbs.nr_unstable < background_thresh
370 && min_pages <= 0 && !dirty_exceeded)
371 break;
372
373
374
375 if (wbs.nr_writeback >= min_writeback &&
376 wbs.nr_writeback > dirty_thresh / 2) {
377 long prev = nr_writeback_p;
378
379 nr_writeback_p = wbs.nr_writeback;
380 if (wbs.nr_writeback < prev)
381 written += prev - wbs.nr_writeback;
382 if (written < ratelimit_pages) {
383 blk_congestion_wait(WRITE, HZ/10);
384 continue;
385 }
386 written = 0;
387 }
388 wbc.encountered_congestion = 0;
389 wbc.nr_to_write = max_writeback_pages;
390 wbc.pages_skipped = 0;
391 writeback_inodes(&wbc);
392 min_pages -= max_writeback_pages - wbc.nr_to_write;
393 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
394
395 blk_congestion_wait(WRITE, HZ/10);
396 if (!wbc.encountered_congestion && !dirty_exceeded)
397 break;
398 }
399 }
400}
401
402
403
404
405
406
407int wakeup_bdflush(long nr_pages)
408{
409 if (nr_pages == 0) {
410 struct writeback_state wbs;
411
412 get_writeback_state(&wbs);
413 nr_pages = wbs.nr_dirty + wbs.nr_unstable;
414 }
415 return pdflush_operation(background_writeout, nr_pages);
416}
417
418static void wb_timer_fn(unsigned long unused);
419static void laptop_timer_fn(unsigned long unused);
420
421static struct timer_list wb_timer =
422 TIMER_INITIALIZER(wb_timer_fn, 0, 0);
423static struct timer_list laptop_mode_wb_timer =
424 TIMER_INITIALIZER(laptop_timer_fn, 0, 0);
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441static void wb_kupdate(unsigned long arg)
442{
443 unsigned long oldest_jif;
444 unsigned long start_jif;
445 unsigned long next_jif;
446 long nr_to_write;
447 struct writeback_state wbs;
448 struct writeback_control wbc = {
449 .bdi = NULL,
450 .sync_mode = WB_SYNC_NONE,
451 .older_than_this = &oldest_jif,
452 .nr_to_write = 0,
453 .nonblocking = 1,
454 .for_kupdate = 1,
455 };
456
457 sync_supers();
458
459 get_writeback_state(&wbs);
460 oldest_jif = jiffies - (dirty_expire_centisecs * HZ) / 100;
461 start_jif = jiffies;
462 next_jif = start_jif + (dirty_writeback_centisecs * HZ) / 100;
463 nr_to_write = wbs.nr_dirty + wbs.nr_unstable +
464 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
465 while (nr_to_write > 0) {
466 wbc.encountered_congestion = 0;
467 wbc.nr_to_write = max_writeback_pages;
468 writeback_inodes(&wbc);
469 if (wbc.nr_to_write > 0) {
470 if (wbc.encountered_congestion)
471 blk_congestion_wait(WRITE, HZ/10);
472 else
473 break;
474 }
475 nr_to_write -= max_writeback_pages - wbc.nr_to_write;
476 }
477 if (time_before(next_jif, jiffies + HZ))
478 next_jif = jiffies + HZ;
479 if (dirty_writeback_centisecs)
480 mod_timer(&wb_timer, next_jif);
481}
482
483
484
485
486int dirty_writeback_centisecs_handler(ctl_table *table, int write,
487 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
488{
489 proc_dointvec(table, write, file, buffer, length, ppos);
490 if (dirty_writeback_centisecs) {
491 mod_timer(&wb_timer,
492 jiffies + (dirty_writeback_centisecs * HZ) / 100);
493 } else {
494 del_timer(&wb_timer);
495 }
496 return 0;
497}
498
499static void wb_timer_fn(unsigned long unused)
500{
501 if (pdflush_operation(wb_kupdate, 0) < 0)
502 mod_timer(&wb_timer, jiffies + HZ);
503}
504
505static void laptop_flush(unsigned long unused)
506{
507 sys_sync();
508}
509
510static void laptop_timer_fn(unsigned long unused)
511{
512 pdflush_operation(laptop_flush, 0);
513}
514
515
516
517
518
519
520void laptop_io_completion(void)
521{
522 mod_timer(&laptop_mode_wb_timer, jiffies + laptop_mode * HZ);
523}
524
525
526
527
528
529
530void laptop_sync_completion(void)
531{
532 del_timer(&laptop_mode_wb_timer);
533}
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552static void set_ratelimit(void)
553{
554 ratelimit_pages = total_pages / (num_online_cpus() * 32);
555 if (ratelimit_pages < 16)
556 ratelimit_pages = 16;
557 if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024)
558 ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE;
559}
560
561static int
562ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
563{
564 set_ratelimit();
565 return 0;
566}
567
568static struct notifier_block ratelimit_nb = {
569 .notifier_call = ratelimit_handler,
570 .next = NULL,
571};
572
573
574
575
576
577
578void __init page_writeback_init(void)
579{
580 long buffer_pages = nr_free_buffer_pages();
581 long correction;
582
583 total_pages = nr_free_pagecache_pages();
584
585 correction = (100 * 4 * buffer_pages) / total_pages;
586
587 if (correction < 100) {
588 dirty_background_ratio *= correction;
589 dirty_background_ratio /= 100;
590 vm_dirty_ratio *= correction;
591 vm_dirty_ratio /= 100;
592
593 if (dirty_background_ratio <= 0)
594 dirty_background_ratio = 1;
595 if (vm_dirty_ratio <= 0)
596 vm_dirty_ratio = 1;
597 }
598 mod_timer(&wb_timer, jiffies + (dirty_writeback_centisecs * HZ) / 100);
599 set_ratelimit();
600 register_cpu_notifier(&ratelimit_nb);
601}
602
603int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
604{
605 if (wbc->nr_to_write <= 0)
606 return 0;
607 if (mapping->a_ops->writepages)
608 return mapping->a_ops->writepages(mapping, wbc);
609 return generic_writepages(mapping, wbc);
610}
611
612
613
614
615
616
617
618
619
620
621
622int write_one_page(struct page *page, int wait)
623{
624 struct address_space *mapping = page->mapping;
625 int ret = 0;
626 struct writeback_control wbc = {
627 .sync_mode = WB_SYNC_ALL,
628 .nr_to_write = 1,
629 };
630
631 BUG_ON(!PageLocked(page));
632
633 if (wait)
634 wait_on_page_writeback(page);
635
636 if (clear_page_dirty_for_io(page)) {
637 page_cache_get(page);
638 ret = mapping->a_ops->writepage(page, &wbc);
639 if (ret == 0 && wait) {
640 wait_on_page_writeback(page);
641 if (PageError(page))
642 ret = -EIO;
643 }
644 page_cache_release(page);
645 } else {
646 unlock_page(page);
647 }
648 return ret;
649}
650EXPORT_SYMBOL(write_one_page);
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667int __set_page_dirty_nobuffers(struct page *page)
668{
669 int ret = 0;
670
671 if (!TestSetPageDirty(page)) {
672 struct address_space *mapping = page_mapping(page);
673
674 if (mapping) {
675 spin_lock_irq(&mapping->tree_lock);
676 mapping = page_mapping(page);
677 if (page_mapping(page)) {
678 BUG_ON(page_mapping(page) != mapping);
679 if (!mapping->backing_dev_info->memory_backed) {
680 inc_page_state(nr_dirty);
681 task_io_account_write(PAGE_CACHE_SIZE);
682 }
683 radix_tree_tag_set(&mapping->page_tree,
684 page_index(page), PAGECACHE_TAG_DIRTY);
685 }
686 spin_unlock_irq(&mapping->tree_lock);
687 if (mapping->host) {
688
689 __mark_inode_dirty(mapping->host,
690 I_DIRTY_PAGES);
691 }
692 }
693 }
694 return ret;
695}
696EXPORT_SYMBOL(__set_page_dirty_nobuffers);
697
698
699
700
701
702
703int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
704{
705 wbc->pages_skipped++;
706 return __set_page_dirty_nobuffers(page);
707}
708EXPORT_SYMBOL(redirty_page_for_writepage);
709
710
711
712
713
714int fastcall set_page_dirty(struct page *page)
715{
716 struct address_space *mapping = page_mapping(page);
717
718 if (likely(mapping)) {
719 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
720 if (spd)
721 return (*spd)(page);
722 return __set_page_dirty_buffers(page);
723 }
724 if (!PageDirty(page))
725 SetPageDirty(page);
726 return 0;
727}
728EXPORT_SYMBOL(set_page_dirty);
729
730
731
732
733
734
735
736
737
738
739
740int set_page_dirty_lock(struct page *page)
741{
742 int ret;
743
744 lock_page(page);
745 ret = set_page_dirty(page);
746 unlock_page(page);
747 return ret;
748}
749EXPORT_SYMBOL(set_page_dirty_lock);
750
751
752
753
754
755int test_clear_page_dirty(struct page *page)
756{
757 struct address_space *mapping = page_mapping(page);
758 unsigned long flags;
759
760 if (mapping) {
761 spin_lock_irqsave(&mapping->tree_lock, flags);
762 if (TestClearPageDirty(page)) {
763 radix_tree_tag_clear(&mapping->page_tree,
764 page_index(page),
765 PAGECACHE_TAG_DIRTY);
766 spin_unlock_irqrestore(&mapping->tree_lock, flags);
767 if (!mapping->backing_dev_info->memory_backed)
768 dec_page_state(nr_dirty);
769 return 1;
770 }
771 spin_unlock_irqrestore(&mapping->tree_lock, flags);
772 return 0;
773 }
774 return TestClearPageDirty(page);
775}
776EXPORT_SYMBOL(test_clear_page_dirty);
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792int clear_page_dirty_for_io(struct page *page)
793{
794 struct address_space *mapping = page_mapping(page);
795
796 if (mapping) {
797 if (TestClearPageDirty(page)) {
798 if (!mapping->backing_dev_info->memory_backed)
799 dec_page_state(nr_dirty);
800 return 1;
801 }
802 return 0;
803 }
804 return TestClearPageDirty(page);
805}
806EXPORT_SYMBOL(clear_page_dirty_for_io);
807
808
809
810
811int __clear_page_dirty(struct page *page)
812{
813 struct address_space *mapping = page_mapping(page);
814
815 if (mapping) {
816 unsigned long flags;
817
818 spin_lock_irqsave(&mapping->tree_lock, flags);
819 if (TestClearPageDirty(page)) {
820 radix_tree_tag_clear(&mapping->page_tree,
821 page_index(page),
822 PAGECACHE_TAG_DIRTY);
823 spin_unlock_irqrestore(&mapping->tree_lock, flags);
824 return 1;
825 }
826 spin_unlock_irqrestore(&mapping->tree_lock, flags);
827 return 0;
828 }
829 return TestClearPageDirty(page);
830}
831
832int test_clear_page_writeback(struct page *page)
833{
834 struct address_space *mapping = page_mapping(page);
835 int ret;
836
837 if (mapping) {
838 unsigned long flags;
839
840 spin_lock_irqsave(&mapping->tree_lock, flags);
841 ret = TestClearPageWriteback(page);
842 if (ret)
843 radix_tree_tag_clear(&mapping->page_tree,
844 page_index(page),
845 PAGECACHE_TAG_WRITEBACK);
846 spin_unlock_irqrestore(&mapping->tree_lock, flags);
847 } else {
848 ret = TestClearPageWriteback(page);
849 }
850 return ret;
851}
852
853int test_set_page_writeback(struct page *page)
854{
855 struct address_space *mapping = page_mapping(page);
856 int ret;
857
858 if (mapping) {
859 unsigned long flags;
860
861 spin_lock_irqsave(&mapping->tree_lock, flags);
862 ret = TestSetPageWriteback(page);
863 if (!ret)
864 radix_tree_tag_set(&mapping->page_tree,
865 page_index(page),
866 PAGECACHE_TAG_WRITEBACK);
867 if (!PageDirty(page))
868 radix_tree_tag_clear(&mapping->page_tree,
869 page_index(page),
870 PAGECACHE_TAG_DIRTY);
871 spin_unlock_irqrestore(&mapping->tree_lock, flags);
872 } else {
873 ret = TestSetPageWriteback(page);
874 }
875 return ret;
876
877}
878EXPORT_SYMBOL(test_set_page_writeback);
879
880
881
882
883
884int mapping_tagged(struct address_space *mapping, int tag)
885{
886 unsigned long flags;
887 int ret;
888
889 spin_lock_irqsave(&mapping->tree_lock, flags);
890 ret = radix_tree_tagged(&mapping->page_tree, tag);
891 spin_unlock_irqrestore(&mapping->tree_lock, flags);
892 return ret;
893}
894EXPORT_SYMBOL(mapping_tagged);
895