1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/config.h>
22#include <linux/kernel.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/smp_lock.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/highmem.h>
32#include <linux/module.h>
33#include <linux/writeback.h>
34#include <linux/hash.h>
35#include <linux/suspend.h>
36#include <linux/buffer_head.h>
37#include <linux/task_io_accounting_ops.h>
38#include <linux/bio.h>
39#include <linux/notifier.h>
40#include <linux/cpu.h>
41#include <asm/bitops.h>
42#include <linux/sysctl.h>
43#include <linux/gfp.h>
44
45
46int sysctl_drop_caches;
47
48static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
49static void invalidate_bh_lrus(void);
50
51#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
52
53struct bh_wait_queue {
54 struct buffer_head *bh;
55 wait_queue_t wait;
56};
57
58#define __DEFINE_BH_WAIT(name, b, f) \
59 struct bh_wait_queue name = { \
60 .bh = b, \
61 .wait = { \
62 .task = current, \
63 .flags = f, \
64 .func = bh_wake_function, \
65 .task_list = \
66 LIST_HEAD_INIT(name.wait.task_list),\
67 }, \
68 }
69#define DEFINE_BH_WAIT(name, bh) __DEFINE_BH_WAIT(name, bh, 0)
70#define DEFINE_BH_WAIT_EXCLUSIVE(name, bh) \
71 __DEFINE_BH_WAIT(name, bh, WQ_FLAG_EXCLUSIVE)
72
73
74
75
76#define BH_WAIT_TABLE_ORDER 7
77static struct bh_wait_queue_head {
78 wait_queue_head_t wqh;
79} ____cacheline_aligned_in_smp bh_wait_queue_heads[1<<BH_WAIT_TABLE_ORDER];
80
81inline void
82init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
83{
84 bh->b_end_io = handler;
85 bh->b_private = private;
86}
87
88
89
90
91
92wait_queue_head_t *bh_waitq_head(struct buffer_head *bh)
93{
94 return &bh_wait_queue_heads[hash_ptr(bh, BH_WAIT_TABLE_ORDER)].wqh;
95}
96EXPORT_SYMBOL(bh_waitq_head);
97
98void wake_up_buffer(struct buffer_head *bh)
99{
100 wait_queue_head_t *wq = bh_waitq_head(bh);
101
102 smp_mb();
103 if (waitqueue_active(wq))
104 __wake_up(wq, TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE, 1, bh);
105}
106EXPORT_SYMBOL(wake_up_buffer);
107
108static int bh_wake_function(wait_queue_t *wait, unsigned mode,
109 int sync, void *key)
110{
111 struct buffer_head *bh = key;
112 struct bh_wait_queue *wq;
113
114 wq = container_of(wait, struct bh_wait_queue, wait);
115 if (wq->bh != bh || buffer_locked(bh))
116 return 0;
117 else
118 return autoremove_wake_function(wait, mode, sync, key);
119}
120
121static void sync_buffer(struct buffer_head *bh)
122{
123 struct block_device *bd;
124
125 smp_mb();
126 bd = bh->b_bdev;
127 if (bd)
128 blk_run_address_space(bd->bd_inode->i_mapping);
129}
130
131void fastcall __lock_buffer(struct buffer_head *bh)
132{
133 wait_queue_head_t *wqh = bh_waitq_head(bh);
134 DEFINE_BH_WAIT_EXCLUSIVE(wait, bh);
135
136 do {
137 prepare_to_wait_exclusive(wqh, &wait.wait,
138 TASK_UNINTERRUPTIBLE);
139 if (buffer_locked(bh)) {
140 sync_buffer(bh);
141 io_schedule();
142 }
143 } while (test_set_buffer_locked(bh));
144 finish_wait(wqh, &wait.wait);
145}
146EXPORT_SYMBOL(__lock_buffer);
147
148void fastcall unlock_buffer(struct buffer_head *bh)
149{
150 smp_mb__before_clear_bit();
151 clear_buffer_locked(bh);
152 smp_mb__after_clear_bit();
153 wake_up_buffer(bh);
154}
155
156
157
158
159
160
161void __wait_on_buffer(struct buffer_head * bh)
162{
163 wait_queue_head_t *wqh = bh_waitq_head(bh);
164 DEFINE_BH_WAIT(wait, bh);
165
166 do {
167 prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE);
168 if (buffer_locked(bh)) {
169 sync_buffer(bh);
170 io_schedule();
171 }
172 } while (buffer_locked(bh));
173 finish_wait(wqh, &wait.wait);
174}
175
176static void
177__set_page_buffers(struct page *page, struct buffer_head *head)
178{
179 page_cache_get(page);
180 SetPagePrivate(page);
181 page->private = (unsigned long)head;
182}
183
184static void
185__clear_page_buffers(struct page *page)
186{
187 ClearPagePrivate(page);
188 page->private = 0;
189 page_cache_release(page);
190}
191
192static void buffer_io_error(struct buffer_head *bh)
193{
194 char b[BDEVNAME_SIZE];
195
196 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
197 bdevname(bh->b_bdev, b),
198 (unsigned long long)bh->b_blocknr);
199}
200
201
202
203
204
205void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
206{
207 if (uptodate) {
208 set_buffer_uptodate(bh);
209 } else {
210
211 clear_buffer_uptodate(bh);
212 }
213 unlock_buffer(bh);
214 put_bh(bh);
215}
216
217void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
218{
219 char b[BDEVNAME_SIZE];
220
221 if (uptodate) {
222 set_buffer_uptodate(bh);
223 } else {
224 if (!buffer_eopnotsupp(bh) && printk_ratelimit()) {
225 buffer_io_error(bh);
226 printk(KERN_WARNING "lost page write due to "
227 "I/O error on %s\n",
228 bdevname(bh->b_bdev, b));
229 }
230 set_buffer_write_io_error(bh);
231 clear_buffer_uptodate(bh);
232 }
233 unlock_buffer(bh);
234 put_bh(bh);
235}
236
237
238
239
240
241int sync_blockdev(struct block_device *bdev)
242{
243 int ret = 0;
244
245 if (bdev) {
246 int err;
247
248 ret = filemap_fdatawrite(bdev->bd_inode->i_mapping);
249 err = filemap_fdatawait(bdev->bd_inode->i_mapping);
250 if (!ret)
251 ret = err;
252 }
253 return ret;
254}
255EXPORT_SYMBOL(sync_blockdev);
256
257
258
259
260
261
262int fsync_super(struct super_block *sb)
263{
264 sync_inodes_sb(sb, 0);
265 DQUOT_SYNC(sb);
266 lock_super(sb);
267 if (sb->s_dirt && sb->s_op->write_super)
268 sb->s_op->write_super(sb);
269 unlock_super(sb);
270 if (sb->s_op->sync_fs)
271 sb->s_op->sync_fs(sb, 1);
272 sync_blockdev(sb->s_bdev);
273 sync_inodes_sb(sb, 1);
274
275 return sync_blockdev(sb->s_bdev);
276}
277
278
279
280
281
282
283int fsync_bdev(struct block_device *bdev)
284{
285 struct super_block *sb = get_super(bdev);
286 if (sb) {
287 int res = fsync_super(sb);
288 drop_super(sb);
289 return res;
290 }
291 return sync_blockdev(bdev);
292}
293
294
295
296
297
298
299
300
301
302
303struct super_block *freeze_bdev(struct block_device *bdev)
304{
305 struct super_block *sb;
306
307 down(&bdev->bd_mount_sem);
308 sb = get_super(bdev);
309 if (sb && !(sb->s_flags & MS_RDONLY)) {
310 sb->s_frozen = SB_FREEZE_WRITE;
311 wmb();
312
313 sync_inodes_sb(sb, 0);
314 DQUOT_SYNC(sb);
315
316 lock_super(sb);
317 if (sb->s_dirt && sb->s_op->write_super)
318 sb->s_op->write_super(sb);
319 unlock_super(sb);
320
321 if (sb->s_op->sync_fs)
322 sb->s_op->sync_fs(sb, 1);
323
324 sync_blockdev(sb->s_bdev);
325 sync_inodes_sb(sb, 1);
326
327 sb->s_frozen = SB_FREEZE_TRANS;
328 wmb();
329
330 sync_blockdev(sb->s_bdev);
331
332 if (sb->s_op->write_super_lockfs)
333 sb->s_op->write_super_lockfs(sb);
334 }
335
336 sync_blockdev(bdev);
337 return sb;
338}
339EXPORT_SYMBOL(freeze_bdev);
340
341
342
343
344
345
346
347
348void thaw_bdev(struct block_device *bdev, struct super_block *sb)
349{
350 if (sb) {
351 BUG_ON(sb->s_bdev != bdev);
352
353 if (sb->s_op->unlockfs)
354 sb->s_op->unlockfs(sb);
355 sb->s_frozen = SB_UNFROZEN;
356 wmb();
357 wake_up(&sb->s_wait_unfrozen);
358 drop_super(sb);
359 }
360
361 up(&bdev->bd_mount_sem);
362}
363EXPORT_SYMBOL(thaw_bdev);
364
365
366
367
368
369static void do_sync(unsigned long wait)
370{
371 wakeup_bdflush(0);
372 sync_inodes(0);
373 DQUOT_SYNC(NULL);
374 sync_supers();
375 sync_filesystems(0);
376 sync_filesystems(wait);
377 sync_inodes(wait);
378 if (!wait)
379 printk("Emergency Sync complete\n");
380 if (unlikely(laptop_mode))
381 laptop_sync_completion();
382}
383
384asmlinkage long sys_sync(void)
385{
386 do_sync(1);
387 return 0;
388}
389
390void emergency_sync(void)
391{
392 pdflush_operation(do_sync, 0);
393}
394
395
396
397
398
399
400
401int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
402{
403 struct inode * inode = dentry->d_inode;
404 struct super_block * sb;
405 int ret, err;
406
407
408 ret = write_inode_now_err(inode, 0);
409
410
411 sb = inode->i_sb;
412 lock_super(sb);
413 if (sb->s_op->write_super)
414 sb->s_op->write_super(sb);
415 unlock_super(sb);
416
417
418 err = sync_blockdev(sb->s_bdev);
419 if (!ret)
420 ret = err;
421 return ret;
422}
423
424asmlinkage long sys_fsync(unsigned int fd)
425{
426 struct file * file;
427 struct address_space *mapping;
428 int ret, err;
429
430 ret = -EBADF;
431 file = fget(fd);
432 if (!file)
433 goto out;
434
435 mapping = file->f_mapping;
436
437 ret = -EINVAL;
438 if (!file->f_op || !file->f_op->fsync) {
439
440 goto out_putf;
441 }
442
443
444 down(&mapping->host->i_sem);
445 current->flags |= PF_SYNCWRITE;
446 ret = filemap_fdatawrite(mapping);
447 err = file->f_op->fsync(file, file->f_dentry, 0);
448 if (!ret)
449 ret = err;
450 err = filemap_fdatawait(mapping);
451 if (!ret)
452 ret = err;
453 current->flags &= ~PF_SYNCWRITE;
454 up(&mapping->host->i_sem);
455
456out_putf:
457 fput(file);
458out:
459 return ret;
460}
461
462asmlinkage long sys_fdatasync(unsigned int fd)
463{
464 struct file * file;
465 struct address_space *mapping;
466 int ret, err;
467
468 ret = -EBADF;
469 file = fget(fd);
470 if (!file)
471 goto out;
472
473 ret = -EINVAL;
474 if (!file->f_op || !file->f_op->fsync)
475 goto out_putf;
476
477 mapping = file->f_mapping;
478
479 down(&mapping->host->i_sem);
480 current->flags |= PF_SYNCWRITE;
481 ret = filemap_fdatawrite(mapping);
482 err = file->f_op->fsync(file, file->f_dentry, 1);
483 if (!ret)
484 ret = err;
485 err = filemap_fdatawait(mapping);
486 if (!ret)
487 ret = err;
488 current->flags &= ~PF_SYNCWRITE;
489 up(&mapping->host->i_sem);
490
491out_putf:
492 fput(file);
493out:
494 return ret;
495}
496
497
498
499
500
501
502
503
504
505
506
507
508static struct buffer_head *
509__find_get_block_slow(struct block_device *bdev, sector_t block, int unused)
510{
511 struct inode *bd_inode = bdev->bd_inode;
512 struct address_space *bd_mapping = bd_inode->i_mapping;
513 struct buffer_head *ret = NULL;
514 pgoff_t index;
515 struct buffer_head *bh;
516 struct buffer_head *head;
517 struct page *page;
518 int all_mapped = 1;
519
520 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
521 page = find_get_page(bd_mapping, index);
522 if (!page)
523 goto out;
524
525 spin_lock(&bd_mapping->private_lock);
526 if (!page_has_buffers(page))
527 goto out_unlock;
528 head = page_buffers(page);
529 bh = head;
530 do {
531 if (bh->b_blocknr == block) {
532 ret = bh;
533 get_bh(bh);
534 goto out_unlock;
535 }
536 if (!buffer_mapped(bh))
537 all_mapped = 0;
538 bh = bh->b_this_page;
539 } while (bh != head);
540
541
542
543
544
545
546 if (all_mapped) {
547 printk("__find_get_block_slow() failed. "
548 "block=%llu, b_blocknr=%llu\n",
549 (unsigned long long)block, (unsigned long long)bh->b_blocknr);
550 printk("b_state=0x%08lx, b_size=%u\n", bh->b_state, bh->b_size);
551 printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits);
552 }
553out_unlock:
554 spin_unlock(&bd_mapping->private_lock);
555 page_cache_release(page);
556out:
557 return ret;
558}
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers)
593{
594 struct address_space *mapping = bdev->bd_inode->i_mapping;
595
596 if (mapping->nrpages == 0)
597 return;
598
599 invalidate_bh_lrus();
600
601
602
603
604
605 invalidate_inode_pages(mapping);
606}
607
608
609
610
611static void free_more_memory(void)
612{
613 struct zone **zones;
614 pg_data_t *pgdat;
615
616 wakeup_bdflush(1024);
617 yield();
618
619 for_each_pgdat(pgdat) {
620 zones = pgdat->node_zonelists[GFP_NOFS&GFP_ZONEMASK].zones;
621 if (*zones)
622 try_to_free_pages(zones, GFP_NOFS, 0, 1, ZONE_NORMAL);
623 }
624}
625
626
627
628
629
630static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
631{
632 static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED;
633 unsigned long flags;
634 struct buffer_head *tmp;
635 struct page *page;
636 int page_uptodate = 1;
637
638 BUG_ON(!buffer_async_read(bh));
639
640 page = bh->b_page;
641 if (uptodate) {
642 set_buffer_uptodate(bh);
643 } else {
644 clear_buffer_uptodate(bh);
645 buffer_io_error(bh);
646 SetPageError(page);
647 }
648
649
650
651
652
653
654 spin_lock_irqsave(&page_uptodate_lock, flags);
655 clear_buffer_async_read(bh);
656 unlock_buffer(bh);
657 tmp = bh;
658 do {
659 if (!buffer_uptodate(tmp))
660 page_uptodate = 0;
661 if (buffer_async_read(tmp)) {
662 BUG_ON(!buffer_locked(tmp));
663 goto still_busy;
664 }
665 tmp = tmp->b_this_page;
666 } while (tmp != bh);
667 spin_unlock_irqrestore(&page_uptodate_lock, flags);
668
669
670
671
672
673 if (page_uptodate && !PageError(page))
674 SetPageUptodate(page);
675 unlock_page(page);
676 return;
677
678still_busy:
679 spin_unlock_irqrestore(&page_uptodate_lock, flags);
680 return;
681}
682
683
684
685
686
687void end_buffer_async_write(struct buffer_head *bh, int uptodate)
688{
689 char b[BDEVNAME_SIZE];
690 static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED;
691 unsigned long flags;
692 struct buffer_head *tmp;
693 struct page *page;
694
695 BUG_ON(!buffer_async_write(bh));
696
697 page = bh->b_page;
698 if (uptodate) {
699 set_buffer_uptodate(bh);
700 } else {
701 if (printk_ratelimit()) {
702 buffer_io_error(bh);
703 printk(KERN_WARNING "lost page write due to "
704 "I/O error on %s\n",
705 bdevname(bh->b_bdev, b));
706 }
707 set_bit(AS_EIO, &page->mapping->flags);
708 clear_buffer_uptodate(bh);
709 SetPageError(page);
710 }
711
712 spin_lock_irqsave(&page_uptodate_lock, flags);
713 clear_buffer_async_write(bh);
714 unlock_buffer(bh);
715 tmp = bh->b_this_page;
716 while (tmp != bh) {
717 if (buffer_async_write(tmp)) {
718 BUG_ON(!buffer_locked(tmp));
719 goto still_busy;
720 }
721 tmp = tmp->b_this_page;
722 }
723 spin_unlock_irqrestore(&page_uptodate_lock, flags);
724 end_page_writeback(page);
725 return;
726
727still_busy:
728 spin_unlock_irqrestore(&page_uptodate_lock, flags);
729 return;
730}
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753static void mark_buffer_async_read(struct buffer_head *bh)
754{
755 bh->b_end_io = end_buffer_async_read;
756 set_buffer_async_read(bh);
757}
758
759void mark_buffer_async_write(struct buffer_head *bh)
760{
761 bh->b_end_io = end_buffer_async_write;
762 set_buffer_async_write(bh);
763}
764EXPORT_SYMBOL(mark_buffer_async_write);
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819static inline void __remove_assoc_queue(struct buffer_head *bh)
820{
821 list_del_init(&bh->b_assoc_buffers);
822}
823
824int inode_has_buffers(struct inode *inode)
825{
826 return !list_empty(&inode->i_data.private_list);
827}
828
829
830
831
832
833
834
835
836
837
838
839static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
840{
841 struct buffer_head *bh;
842 struct list_head *p;
843 int err = 0;
844
845 spin_lock(lock);
846repeat:
847 list_for_each_prev(p, list) {
848 bh = BH_ENTRY(p);
849 if (buffer_locked(bh)) {
850 get_bh(bh);
851 spin_unlock(lock);
852 wait_on_buffer(bh);
853 if (!buffer_uptodate(bh))
854 err = -EIO;
855 brelse(bh);
856 spin_lock(lock);
857 goto repeat;
858 }
859 }
860 spin_unlock(lock);
861 return err;
862}
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877int sync_mapping_buffers(struct address_space *mapping)
878{
879 struct address_space *buffer_mapping = mapping->assoc_mapping;
880
881 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
882 return 0;
883
884 return fsync_buffers_list(&buffer_mapping->private_lock,
885 &mapping->private_list);
886}
887EXPORT_SYMBOL(sync_mapping_buffers);
888
889
890
891
892
893
894
895void write_boundary_block(struct block_device *bdev,
896 sector_t bblock, unsigned blocksize)
897{
898 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
899 if (bh) {
900 if (buffer_dirty(bh))
901 ll_rw_block(WRITE, 1, &bh);
902 put_bh(bh);
903 }
904}
905
906void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
907{
908 struct address_space *mapping = inode->i_mapping;
909 struct address_space *buffer_mapping = bh->b_page->mapping;
910
911 mark_buffer_dirty(bh);
912 if (!mapping->assoc_mapping) {
913 mapping->assoc_mapping = buffer_mapping;
914 } else {
915 if (mapping->assoc_mapping != buffer_mapping)
916 BUG();
917 }
918 if (list_empty(&bh->b_assoc_buffers)) {
919 spin_lock(&buffer_mapping->private_lock);
920 list_move_tail(&bh->b_assoc_buffers,
921 &mapping->private_list);
922 spin_unlock(&buffer_mapping->private_lock);
923 }
924}
925EXPORT_SYMBOL(mark_buffer_dirty_inode);
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952int __set_page_dirty_buffers(struct page *page)
953{
954 struct address_space * const mapping = page->mapping;
955
956 spin_lock(&mapping->private_lock);
957 if (page_has_buffers(page)) {
958 struct buffer_head *head = page_buffers(page);
959 struct buffer_head *bh = head;
960
961 do {
962 set_buffer_dirty(bh);
963 bh = bh->b_this_page;
964 } while (bh != head);
965 }
966 spin_unlock(&mapping->private_lock);
967
968 if (!TestSetPageDirty(page)) {
969 spin_lock_irq(&mapping->tree_lock);
970 if (page->mapping) {
971 if (!mapping->backing_dev_info->memory_backed) {
972 inc_page_state(nr_dirty);
973 task_io_account_write(PAGE_CACHE_SIZE);
974 }
975 radix_tree_tag_set(&mapping->page_tree,
976 page_index(page),
977 PAGECACHE_TAG_DIRTY);
978 }
979 spin_unlock_irq(&mapping->tree_lock);
980 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
981 }
982
983 return 0;
984}
985EXPORT_SYMBOL(__set_page_dirty_buffers);
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
1007{
1008 struct buffer_head *bh;
1009 struct list_head tmp;
1010 int err = 0, err2;
1011
1012 INIT_LIST_HEAD(&tmp);
1013
1014 spin_lock(lock);
1015 while (!list_empty(list)) {
1016 bh = BH_ENTRY(list->next);
1017 list_del_init(&bh->b_assoc_buffers);
1018 if (buffer_dirty(bh) || buffer_locked(bh)) {
1019 list_add(&bh->b_assoc_buffers, &tmp);
1020 if (buffer_dirty(bh)) {
1021 get_bh(bh);
1022 spin_unlock(lock);
1023
1024
1025
1026
1027
1028
1029 wait_on_buffer(bh);
1030 ll_rw_block(WRITE, 1, &bh);
1031 brelse(bh);
1032 spin_lock(lock);
1033 }
1034 }
1035 }
1036
1037 while (!list_empty(&tmp)) {
1038 bh = BH_ENTRY(tmp.prev);
1039 __remove_assoc_queue(bh);
1040 get_bh(bh);
1041 spin_unlock(lock);
1042 wait_on_buffer(bh);
1043 if (!buffer_uptodate(bh))
1044 err = -EIO;
1045 brelse(bh);
1046 spin_lock(lock);
1047 }
1048
1049 spin_unlock(lock);
1050 err2 = osync_buffers_list(lock, list);
1051 if (err)
1052 return err;
1053 else
1054 return err2;
1055}
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066void invalidate_inode_buffers(struct inode *inode)
1067{
1068 if (inode_has_buffers(inode)) {
1069 struct address_space *mapping = &inode->i_data;
1070 struct list_head *list = &mapping->private_list;
1071 struct address_space *buffer_mapping = mapping->assoc_mapping;
1072
1073 spin_lock(&buffer_mapping->private_lock);
1074 while (!list_empty(list))
1075 __remove_assoc_queue(BH_ENTRY(list->next));
1076 spin_unlock(&buffer_mapping->private_lock);
1077 }
1078}
1079
1080
1081
1082
1083
1084
1085
1086int remove_inode_buffers(struct inode *inode)
1087{
1088 int ret = 1;
1089
1090 if (inode_has_buffers(inode)) {
1091 struct address_space *mapping = &inode->i_data;
1092 struct list_head *list = &mapping->private_list;
1093 struct address_space *buffer_mapping = mapping->assoc_mapping;
1094
1095 spin_lock(&buffer_mapping->private_lock);
1096 while (!list_empty(list)) {
1097 struct buffer_head *bh = BH_ENTRY(list->next);
1098 if (buffer_dirty(bh)) {
1099 ret = 0;
1100 break;
1101 }
1102 __remove_assoc_queue(bh);
1103 }
1104 spin_unlock(&buffer_mapping->private_lock);
1105 }
1106 return ret;
1107}
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118static struct buffer_head *
1119create_buffers(struct page * page, unsigned long size, int retry)
1120{
1121 struct buffer_head *bh, *head;
1122 long offset;
1123
1124try_again:
1125 head = NULL;
1126 offset = PAGE_SIZE;
1127 while ((offset -= size) >= 0) {
1128 bh = alloc_buffer_head(GFP_NOFS);
1129 if (!bh)
1130 goto no_grow;
1131
1132 bh->b_bdev = NULL;
1133 bh->b_this_page = head;
1134 bh->b_blocknr = -1;
1135 head = bh;
1136
1137 bh->b_state = 0;
1138 atomic_set(&bh->b_count, 0);
1139 bh->b_size = size;
1140
1141
1142 set_bh_page(bh, page, offset);
1143
1144 bh->b_end_io = NULL;
1145 }
1146 return head;
1147
1148
1149
1150no_grow:
1151 if (head) {
1152 do {
1153 bh = head;
1154 head = head->b_this_page;
1155 free_buffer_head(bh);
1156 } while (head);
1157 }
1158
1159
1160
1161
1162
1163
1164
1165 if (!retry)
1166 return NULL;
1167
1168
1169
1170
1171
1172
1173
1174 free_more_memory();
1175 goto try_again;
1176}
1177
1178static inline void
1179link_dev_buffers(struct page *page, struct buffer_head *head)
1180{
1181 struct buffer_head *bh, *tail;
1182
1183 bh = head;
1184 do {
1185 tail = bh;
1186 bh = bh->b_this_page;
1187 } while (bh);
1188 tail->b_this_page = head;
1189 __set_page_buffers(page, head);
1190}
1191
1192
1193
1194
1195static void
1196init_page_buffers(struct page *page, struct block_device *bdev,
1197 sector_t block, int size)
1198{
1199 struct buffer_head *head = page_buffers(page);
1200 struct buffer_head *bh = head;
1201 int uptodate = PageUptodate(page);
1202
1203 do {
1204 if (!buffer_mapped(bh)) {
1205 init_buffer(bh, NULL, NULL);
1206 bh->b_bdev = bdev;
1207 bh->b_blocknr = block;
1208 if (uptodate)
1209 set_buffer_uptodate(bh);
1210 set_buffer_mapped(bh);
1211 }
1212 block++;
1213 bh = bh->b_this_page;
1214 } while (bh != head);
1215}
1216
1217
1218
1219
1220
1221
1222static struct page *
1223grow_dev_page(struct block_device *bdev, sector_t block,
1224 pgoff_t index, int size)
1225{
1226 struct inode *inode = bdev->bd_inode;
1227 struct page *page;
1228 struct buffer_head *bh;
1229
1230 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
1231 if (!page)
1232 return NULL;
1233
1234 if (!PageLocked(page))
1235 BUG();
1236
1237 if (page_has_buffers(page)) {
1238 bh = page_buffers(page);
1239 if (bh->b_size == size) {
1240 init_page_buffers(page, bdev, block, size);
1241 return page;
1242 }
1243 if (!try_to_free_buffers(page))
1244 goto failed;
1245 }
1246
1247
1248
1249
1250 bh = create_buffers(page, size, 0);
1251 if (!bh)
1252 goto failed;
1253
1254
1255
1256
1257
1258
1259 spin_lock(&inode->i_mapping->private_lock);
1260 link_dev_buffers(page, bh);
1261 init_page_buffers(page, bdev, block, size);
1262 spin_unlock(&inode->i_mapping->private_lock);
1263 return page;
1264
1265failed:
1266 BUG();
1267 unlock_page(page);
1268 page_cache_release(page);
1269 return NULL;
1270}
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281static inline int
1282grow_buffers(struct block_device *bdev, sector_t block, int size)
1283{
1284 struct page *page;
1285 pgoff_t index;
1286 int sizebits;
1287
1288 sizebits = -1;
1289 do {
1290 sizebits++;
1291 } while ((size << sizebits) < PAGE_SIZE);
1292
1293 index = block >> sizebits;
1294
1295
1296
1297
1298
1299 if (unlikely(index != block >> sizebits)) {
1300 char b[BDEVNAME_SIZE];
1301
1302 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1303 "device %s\n",
1304 __FUNCTION__, (unsigned long long)block,
1305 bdevname(bdev, b));
1306 return -EIO;
1307 }
1308 block = index << sizebits;
1309
1310 page = grow_dev_page(bdev, block, index, size);
1311 if (!page)
1312 return 0;
1313 unlock_page(page);
1314 page_cache_release(page);
1315 return 1;
1316}
1317
1318struct buffer_head *
1319__getblk_slow(struct block_device *bdev, sector_t block, int size)
1320{
1321
1322 if (unlikely(size & (bdev_hardsect_size(bdev)-1) ||
1323 (size < 512 || size > PAGE_SIZE))) {
1324 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1325 size);
1326 printk(KERN_ERR "hardsect size: %d\n",
1327 bdev_hardsect_size(bdev));
1328
1329 dump_stack();
1330 return NULL;
1331 }
1332
1333 for (;;) {
1334 struct buffer_head * bh;
1335 int ret;
1336
1337 bh = __find_get_block(bdev, block, size);
1338 if (bh)
1339 return bh;
1340
1341 ret = grow_buffers(bdev, block, size);
1342 if (ret < 0)
1343 return NULL;
1344 if (ret == 0)
1345 free_more_memory();
1346 }
1347}
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383void fastcall mark_buffer_dirty(struct buffer_head *bh)
1384{
1385
1386
1387
1388
1389
1390
1391 if (buffer_dirty(bh)) {
1392 smp_mb();
1393 if (buffer_dirty(bh))
1394 return;
1395 }
1396
1397 if (!test_set_buffer_dirty(bh))
1398 __set_page_dirty_nobuffers(bh->b_page);
1399}
1400
1401
1402
1403
1404
1405
1406
1407
1408void __brelse(struct buffer_head * buf)
1409{
1410 if (atomic_read(&buf->b_count)) {
1411 put_bh(buf);
1412 return;
1413 }
1414 printk(KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1415 WARN_ON(1);
1416}
1417
1418
1419
1420
1421
1422void __bforget(struct buffer_head *bh)
1423{
1424 clear_buffer_dirty(bh);
1425 if (!list_empty(&bh->b_assoc_buffers)) {
1426 struct address_space *buffer_mapping = bh->b_page->mapping;
1427
1428 spin_lock(&buffer_mapping->private_lock);
1429 list_del_init(&bh->b_assoc_buffers);
1430 spin_unlock(&buffer_mapping->private_lock);
1431 }
1432 __brelse(bh);
1433}
1434
1435static struct buffer_head *__bread_slow(struct buffer_head *bh)
1436{
1437 lock_buffer(bh);
1438 if (buffer_uptodate(bh)) {
1439 unlock_buffer(bh);
1440 return bh;
1441 } else {
1442 get_bh(bh);
1443 bh->b_end_io = end_buffer_read_sync;
1444 submit_bh(READ, bh);
1445 wait_on_buffer(bh);
1446 if (buffer_uptodate(bh))
1447 return bh;
1448 }
1449 brelse(bh);
1450 return NULL;
1451}
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467#define BH_LRU_SIZE 8
1468
1469struct bh_lru {
1470 struct buffer_head *bhs[BH_LRU_SIZE];
1471};
1472
1473static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1474
1475#ifdef CONFIG_SMP
1476#define bh_lru_lock() local_irq_disable()
1477#define bh_lru_unlock() local_irq_enable()
1478#else
1479#define bh_lru_lock() preempt_disable()
1480#define bh_lru_unlock() preempt_enable()
1481#endif
1482
1483static inline void check_irqs_on(void)
1484{
1485#ifdef irqs_disabled
1486 BUG_ON(irqs_disabled());
1487#endif
1488}
1489
1490
1491
1492
1493static void bh_lru_install(struct buffer_head *bh)
1494{
1495 struct buffer_head *evictee = NULL;
1496 struct bh_lru *lru;
1497
1498 check_irqs_on();
1499 bh_lru_lock();
1500 lru = &__get_cpu_var(bh_lrus);
1501 if (lru->bhs[0] != bh) {
1502 struct buffer_head *bhs[BH_LRU_SIZE];
1503 int in;
1504 int out = 0;
1505
1506 get_bh(bh);
1507 bhs[out++] = bh;
1508 for (in = 0; in < BH_LRU_SIZE; in++) {
1509 struct buffer_head *bh2 = lru->bhs[in];
1510
1511 if (bh2 == bh) {
1512 __brelse(bh2);
1513 } else {
1514 if (out >= BH_LRU_SIZE) {
1515 BUG_ON(evictee != NULL);
1516 evictee = bh2;
1517 } else {
1518 bhs[out++] = bh2;
1519 }
1520 }
1521 }
1522 while (out < BH_LRU_SIZE)
1523 bhs[out++] = NULL;
1524 memcpy(lru->bhs, bhs, sizeof(bhs));
1525 }
1526 bh_lru_unlock();
1527
1528 if (evictee)
1529 __brelse(evictee);
1530}
1531
1532
1533
1534
1535static inline struct buffer_head *
1536lookup_bh_lru(struct block_device *bdev, sector_t block, int size)
1537{
1538 struct buffer_head *ret = NULL;
1539 struct bh_lru *lru;
1540 int i;
1541
1542 check_irqs_on();
1543 bh_lru_lock();
1544 lru = &__get_cpu_var(bh_lrus);
1545 for (i = 0; i < BH_LRU_SIZE; i++) {
1546 struct buffer_head *bh = lru->bhs[i];
1547
1548 if (bh && bh->b_bdev == bdev &&
1549 bh->b_blocknr == block && bh->b_size == size) {
1550 if (i) {
1551 while (i) {
1552 lru->bhs[i] = lru->bhs[i - 1];
1553 i--;
1554 }
1555 lru->bhs[0] = bh;
1556 }
1557 get_bh(bh);
1558 ret = bh;
1559 break;
1560 }
1561 }
1562 bh_lru_unlock();
1563 return ret;
1564}
1565
1566
1567
1568
1569
1570
1571struct buffer_head *
1572__find_get_block(struct block_device *bdev, sector_t block, int size)
1573{
1574 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1575
1576 if (bh == NULL) {
1577 bh = __find_get_block_slow(bdev, block, size);
1578 if (bh)
1579 bh_lru_install(bh);
1580 }
1581 if (bh)
1582 touch_buffer(bh);
1583 return bh;
1584}
1585EXPORT_SYMBOL(__find_get_block);
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599struct buffer_head *
1600__getblk(struct block_device *bdev, sector_t block, int size)
1601{
1602 struct buffer_head *bh = __find_get_block(bdev, block, size);
1603
1604 might_sleep();
1605 if (bh == NULL)
1606 bh = __getblk_slow(bdev, block, size);
1607 return bh;
1608}
1609EXPORT_SYMBOL(__getblk);
1610
1611
1612
1613
1614void __breadahead(struct block_device *bdev, sector_t block, int size)
1615{
1616 struct buffer_head *bh = __getblk(bdev, block, size);
1617 if (likely(bh)) {
1618 ll_rw_block(READA, 1, &bh);
1619 brelse(bh);
1620 }
1621}
1622EXPORT_SYMBOL(__breadahead);
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632struct buffer_head *
1633__bread(struct block_device *bdev, sector_t block, int size)
1634{
1635 struct buffer_head *bh = __getblk(bdev, block, size);
1636
1637 if (likely(bh) && !buffer_uptodate(bh))
1638 bh = __bread_slow(bh);
1639 return bh;
1640}
1641EXPORT_SYMBOL(__bread);
1642
1643
1644
1645
1646
1647
1648static void invalidate_bh_lru(void *arg)
1649{
1650 struct bh_lru *b = &get_cpu_var(bh_lrus);
1651 int i;
1652
1653 for (i = 0; i < BH_LRU_SIZE; i++) {
1654 brelse(b->bhs[i]);
1655 b->bhs[i] = NULL;
1656 }
1657 put_cpu_var(bh_lrus);
1658}
1659
1660static void invalidate_bh_lrus(void)
1661{
1662 on_each_cpu(invalidate_bh_lru, NULL, 1, 1);
1663}
1664
1665void set_bh_page(struct buffer_head *bh,
1666 struct page *page, unsigned long offset)
1667{
1668 bh->b_page = page;
1669 if (offset >= PAGE_SIZE)
1670 BUG();
1671 if (PageHighMem(page))
1672
1673
1674
1675 bh->b_data = (char *)(0 + offset);
1676 else
1677 bh->b_data = page_address(page) + offset;
1678}
1679EXPORT_SYMBOL(set_bh_page);
1680
1681
1682
1683
1684static inline void discard_buffer(struct buffer_head * bh)
1685{
1686 lock_buffer(bh);
1687 clear_buffer_dirty(bh);
1688 bh->b_bdev = NULL;
1689 clear_buffer_mapped(bh);
1690 clear_buffer_req(bh);
1691 clear_buffer_new(bh);
1692 clear_buffer_delay(bh);
1693 unlock_buffer(bh);
1694}
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711int try_to_release_page(struct page *page, int gfp_mask)
1712{
1713 struct address_space * const mapping = page->mapping;
1714
1715 BUG_ON(!PageLocked(page));
1716 if (PageWriteback(page))
1717 return 0;
1718
1719 if (mapping && mapping->a_ops->releasepage)
1720 return mapping->a_ops->releasepage(page, gfp_mask);
1721 return try_to_free_buffers(page);
1722}
1723EXPORT_SYMBOL(try_to_release_page);
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740int block_invalidatepage(struct page *page, unsigned long offset)
1741{
1742 struct buffer_head *head, *bh, *next;
1743 unsigned int curr_off = 0;
1744 int ret = 1;
1745
1746 BUG_ON(!PageLocked(page));
1747 if (!page_has_buffers(page))
1748 goto out;
1749
1750 head = page_buffers(page);
1751 bh = head;
1752 do {
1753 unsigned int next_off = curr_off + bh->b_size;
1754 next = bh->b_this_page;
1755
1756
1757
1758
1759 if (offset <= curr_off)
1760 discard_buffer(bh);
1761 curr_off = next_off;
1762 bh = next;
1763 } while (bh != head);
1764
1765
1766
1767
1768
1769
1770 if (offset == 0)
1771 ret = try_to_release_page(page, 0);
1772out:
1773 return ret;
1774}
1775EXPORT_SYMBOL(block_invalidatepage);
1776
1777
1778
1779
1780
1781
1782void create_empty_buffers(struct page *page,
1783 unsigned long blocksize, unsigned long b_state)
1784{
1785 struct buffer_head *bh, *head, *tail;
1786
1787 head = create_buffers(page, blocksize, 1);
1788 bh = head;
1789 do {
1790 bh->b_state |= b_state;
1791 tail = bh;
1792 bh = bh->b_this_page;
1793 } while (bh);
1794 tail->b_this_page = head;
1795
1796 spin_lock(&page->mapping->private_lock);
1797 if (PageUptodate(page) || PageDirty(page)) {
1798 bh = head;
1799 do {
1800 if (PageDirty(page))
1801 set_buffer_dirty(bh);
1802 if (PageUptodate(page))
1803 set_buffer_uptodate(bh);
1804 bh = bh->b_this_page;
1805 } while (bh != head);
1806 }
1807 __set_page_buffers(page, head);
1808 spin_unlock(&page->mapping->private_lock);
1809}
1810EXPORT_SYMBOL(create_empty_buffers);
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1829{
1830 struct buffer_head *old_bh;
1831
1832 might_sleep();
1833
1834 old_bh = __find_get_block_slow(bdev, block, 0);
1835 if (old_bh) {
1836 clear_buffer_dirty(old_bh);
1837 wait_on_buffer(old_bh);
1838 clear_buffer_req(old_bh);
1839 __brelse(old_bh);
1840 }
1841}
1842EXPORT_SYMBOL(unmap_underlying_metadata);
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869static int __block_write_full_page(struct inode *inode, struct page *page,
1870 get_block_t *get_block, struct writeback_control *wbc)
1871{
1872 int err;
1873 sector_t block;
1874 sector_t last_block;
1875 struct buffer_head *bh, *head;
1876 int nr_underway = 0;
1877
1878 BUG_ON(!PageLocked(page));
1879
1880 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1881
1882 if (!page_has_buffers(page)) {
1883 create_empty_buffers(page, 1 << inode->i_blkbits,
1884 (1 << BH_Dirty)|(1 << BH_Uptodate));
1885 }
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897 block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1898 head = page_buffers(page);
1899 bh = head;
1900
1901
1902
1903
1904
1905 do {
1906 if (block > last_block) {
1907
1908
1909
1910
1911
1912
1913
1914
1915 clear_buffer_dirty(bh);
1916 set_buffer_uptodate(bh);
1917 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1918 err = get_block(inode, block, bh, 1);
1919 if (err)
1920 goto recover;
1921 if (buffer_new(bh)) {
1922
1923 clear_buffer_new(bh);
1924 unmap_underlying_metadata(bh->b_bdev,
1925 bh->b_blocknr);
1926 }
1927 }
1928 bh = bh->b_this_page;
1929 block++;
1930 } while (bh != head);
1931
1932 do {
1933 get_bh(bh);
1934 if (!buffer_mapped(bh))
1935 continue;
1936
1937
1938
1939
1940
1941
1942
1943 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1944 lock_buffer(bh);
1945 } else if (test_set_buffer_locked(bh)) {
1946 redirty_page_for_writepage(wbc, page);
1947 continue;
1948 }
1949 if (test_clear_buffer_dirty(bh)) {
1950 mark_buffer_async_write(bh);
1951 } else {
1952 unlock_buffer(bh);
1953 }
1954 } while ((bh = bh->b_this_page) != head);
1955
1956
1957
1958
1959
1960 BUG_ON(PageWriteback(page));
1961 set_page_writeback(page);
1962 unlock_page(page);
1963
1964 do {
1965 struct buffer_head *next = bh->b_this_page;
1966 if (buffer_async_write(bh)) {
1967 submit_bh(WRITE, bh);
1968 nr_underway++;
1969 }
1970 put_bh(bh);
1971 bh = next;
1972 } while (bh != head);
1973
1974 err = 0;
1975done:
1976 if (nr_underway == 0) {
1977
1978
1979
1980
1981
1982 int uptodate = 1;
1983 do {
1984 if (!buffer_uptodate(bh)) {
1985 uptodate = 0;
1986 break;
1987 }
1988 bh = bh->b_this_page;
1989 } while (bh != head);
1990 if (uptodate)
1991 SetPageUptodate(page);
1992 end_page_writeback(page);
1993
1994
1995
1996
1997 wbc->pages_skipped++;
1998 }
1999 return err;
2000
2001recover:
2002
2003
2004
2005
2006
2007
2008 bh = head;
2009
2010 do {
2011 get_bh(bh);
2012 if (buffer_mapped(bh) && buffer_dirty(bh)) {
2013 lock_buffer(bh);
2014 mark_buffer_async_write(bh);
2015 } else {
2016
2017
2018
2019
2020 clear_buffer_dirty(bh);
2021 }
2022 } while ((bh = bh->b_this_page) != head);
2023 SetPageError(page);
2024 BUG_ON(PageWriteback(page));
2025 set_page_writeback(page);
2026 unlock_page(page);
2027 do {
2028 struct buffer_head *next = bh->b_this_page;
2029 if (buffer_async_write(bh)) {
2030 clear_buffer_dirty(bh);
2031 submit_bh(WRITE, bh);
2032 nr_underway++;
2033 }
2034 put_bh(bh);
2035 bh = next;
2036 } while (bh != head);
2037 goto done;
2038}
2039
2040static int __block_prepare_write(struct inode *inode, struct page *page,
2041 unsigned from, unsigned to, get_block_t *get_block)
2042{
2043 unsigned block_start, block_end;
2044 sector_t block;
2045 int err = 0;
2046 unsigned blocksize, bbits;
2047 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
2048
2049 BUG_ON(!PageLocked(page));
2050 BUG_ON(from > PAGE_CACHE_SIZE);
2051 BUG_ON(to > PAGE_CACHE_SIZE);
2052 BUG_ON(from > to);
2053
2054 blocksize = 1 << inode->i_blkbits;
2055 if (!page_has_buffers(page))
2056 create_empty_buffers(page, blocksize, 0);
2057 head = page_buffers(page);
2058
2059 bbits = inode->i_blkbits;
2060 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
2061
2062 for(bh = head, block_start = 0; bh != head || !block_start;
2063 block++, block_start=block_end, bh = bh->b_this_page) {
2064 block_end = block_start + blocksize;
2065 if (block_end <= from || block_start >= to) {
2066 if (PageUptodate(page)) {
2067 if (!buffer_uptodate(bh))
2068 set_buffer_uptodate(bh);
2069 }
2070 continue;
2071 }
2072 if (buffer_new(bh))
2073 clear_buffer_new(bh);
2074 if (!buffer_mapped(bh)) {
2075 err = get_block(inode, block, bh, 1);
2076 if (err)
2077 goto out;
2078 if (buffer_new(bh)) {
2079 unmap_underlying_metadata(bh->b_bdev,
2080 bh->b_blocknr);
2081 if (PageUptodate(page)) {
2082 set_buffer_uptodate(bh);
2083 continue;
2084 }
2085 if (block_end > to || block_start < from) {
2086 void *kaddr;
2087
2088 kaddr = kmap_atomic(page, KM_USER0);
2089 if (block_end > to)
2090 memset(kaddr+to, 0,
2091 block_end-to);
2092 if (block_start < from)
2093 memset(kaddr+block_start,
2094 0, from-block_start);
2095 flush_dcache_page(page);
2096 kunmap_atomic(kaddr, KM_USER0);
2097 }
2098 continue;
2099 }
2100 }
2101 if (PageUptodate(page)) {
2102 if (!buffer_uptodate(bh))
2103 set_buffer_uptodate(bh);
2104 continue;
2105 }
2106 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
2107 (block_start < from || block_end > to)) {
2108 ll_rw_block(READ, 1, &bh);
2109 *wait_bh++=bh;
2110 }
2111 }
2112
2113
2114
2115 while(wait_bh > wait) {
2116 wait_on_buffer(*--wait_bh);
2117 if (!buffer_uptodate(*wait_bh))
2118 return -EIO;
2119 }
2120
2121 bh = head;
2122 do {
2123 if (buffer_new(bh))
2124 clear_buffer_new(bh);
2125 } while ((bh = bh->b_this_page) != head);
2126
2127 return 0;
2128out:
2129
2130
2131
2132
2133
2134 bh = head;
2135 block_start = 0;
2136 do {
2137 block_end = block_start+blocksize;
2138 if (block_end <= from)
2139 goto next_bh;
2140 if (block_start >= to)
2141 break;
2142 if (buffer_new(bh)) {
2143 void *kaddr;
2144
2145 clear_buffer_new(bh);
2146 kaddr = kmap_atomic(page, KM_USER0);
2147 memset(kaddr+block_start, 0, bh->b_size);
2148 kunmap_atomic(kaddr, KM_USER0);
2149 set_buffer_uptodate(bh);
2150 mark_buffer_dirty(bh);
2151 }
2152next_bh:
2153 block_start = block_end;
2154 bh = bh->b_this_page;
2155 } while (bh != head);
2156 return err;
2157}
2158
2159static int __block_commit_write(struct inode *inode, struct page *page,
2160 unsigned from, unsigned to)
2161{
2162 unsigned block_start, block_end;
2163 int partial = 0;
2164 unsigned blocksize;
2165 struct buffer_head *bh, *head;
2166
2167 blocksize = 1 << inode->i_blkbits;
2168
2169 for(bh = head = page_buffers(page), block_start = 0;
2170 bh != head || !block_start;
2171 block_start=block_end, bh = bh->b_this_page) {
2172 block_end = block_start + blocksize;
2173 if (block_end <= from || block_start >= to) {
2174 if (!buffer_uptodate(bh))
2175 partial = 1;
2176 } else {
2177 set_buffer_uptodate(bh);
2178 mark_buffer_dirty(bh);
2179 }
2180 }
2181
2182
2183
2184
2185
2186
2187
2188 if (!partial)
2189 SetPageUptodate(page);
2190 return 0;
2191}
2192
2193
2194
2195
2196
2197
2198
2199
2200int block_read_full_page(struct page *page, get_block_t *get_block)
2201{
2202 struct inode *inode = page->mapping->host;
2203 sector_t iblock, lblock;
2204 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2205 unsigned int blocksize;
2206 int nr, i;
2207 int fully_mapped = 1;
2208
2209 if (!PageLocked(page))
2210 PAGE_BUG(page);
2211 blocksize = 1 << inode->i_blkbits;
2212 if (!page_has_buffers(page))
2213 create_empty_buffers(page, blocksize, 0);
2214 head = page_buffers(page);
2215
2216 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2217 lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
2218 bh = head;
2219 nr = 0;
2220 i = 0;
2221
2222 do {
2223 if (buffer_uptodate(bh))
2224 continue;
2225
2226 if (!buffer_mapped(bh)) {
2227 int err = 0;
2228
2229 fully_mapped = 0;
2230 if (iblock < lblock) {
2231 err = get_block(inode, iblock, bh, 0);
2232 if (err)
2233 SetPageError(page);
2234 }
2235 if (!buffer_mapped(bh)) {
2236 void *kaddr = kmap_atomic(page, KM_USER0);
2237 memset(kaddr + i * blocksize, 0, blocksize);
2238 flush_dcache_page(page);
2239 kunmap_atomic(kaddr, KM_USER0);
2240 if (!err)
2241 set_buffer_uptodate(bh);
2242 continue;
2243 }
2244
2245
2246
2247
2248 if (buffer_uptodate(bh))
2249 continue;
2250 }
2251 arr[nr++] = bh;
2252 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2253
2254 if (fully_mapped)
2255 SetPageMappedToDisk(page);
2256
2257 if (!nr) {
2258
2259
2260
2261
2262 if (!PageError(page))
2263 SetPageUptodate(page);
2264 unlock_page(page);
2265 return 0;
2266 }
2267
2268
2269 for (i = 0; i < nr; i++) {
2270 bh = arr[i];
2271 lock_buffer(bh);
2272 mark_buffer_async_read(bh);
2273 }
2274
2275
2276
2277
2278
2279
2280 for (i = 0; i < nr; i++) {
2281 bh = arr[i];
2282 if (buffer_uptodate(bh))
2283 end_buffer_async_read(bh, 1);
2284 else
2285 submit_bh(READ, bh);
2286 }
2287 return 0;
2288}
2289
2290
2291
2292
2293
2294int generic_cont_expand(struct inode *inode, loff_t size)
2295{
2296 struct address_space *mapping = inode->i_mapping;
2297 struct page *page;
2298 unsigned long index, offset, limit;
2299 int err;
2300
2301 err = -EFBIG;
2302 limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
2303 if (limit != RLIM_INFINITY && size > (loff_t)limit) {
2304 send_sig(SIGXFSZ, current, 0);
2305 goto out;
2306 }
2307 if (size > inode->i_sb->s_maxbytes)
2308 goto out;
2309
2310 offset = (size & (PAGE_CACHE_SIZE-1));
2311
2312
2313
2314
2315
2316 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
2317 offset++;
2318 }
2319 index = size >> PAGE_CACHE_SHIFT;
2320 err = -ENOMEM;
2321 page = grab_cache_page(mapping, index);
2322 if (!page)
2323 goto out;
2324 err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
2325 if (!err) {
2326 err = mapping->a_ops->commit_write(NULL, page, offset, offset);
2327 }
2328 unlock_page(page);
2329 page_cache_release(page);
2330 if (err > 0)
2331 err = 0;
2332out:
2333 return err;
2334}
2335
2336
2337
2338
2339
2340
2341int cont_prepare_write(struct page *page, unsigned offset,
2342 unsigned to, get_block_t *get_block, loff_t *bytes)
2343{
2344 struct address_space *mapping = page->mapping;
2345 struct inode *inode = mapping->host;
2346 struct page *new_page;
2347 pgoff_t pgpos;
2348 long status;
2349 unsigned zerofrom;
2350 unsigned blocksize = 1 << inode->i_blkbits;
2351 void *kaddr;
2352
2353 while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
2354 status = -ENOMEM;
2355 new_page = grab_cache_page(mapping, pgpos);
2356 if (!new_page)
2357 goto out;
2358
2359 if (*bytes>>PAGE_CACHE_SHIFT != pgpos) {
2360 unlock_page(new_page);
2361 page_cache_release(new_page);
2362 continue;
2363 }
2364 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2365 if (zerofrom & (blocksize-1)) {
2366 *bytes |= (blocksize-1);
2367 (*bytes)++;
2368 }
2369 status = __block_prepare_write(inode, new_page, zerofrom,
2370 PAGE_CACHE_SIZE, get_block);
2371 if (status)
2372 goto out_unmap;
2373 kaddr = kmap_atomic(new_page, KM_USER0);
2374 memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom);
2375 flush_dcache_page(new_page);
2376 kunmap_atomic(kaddr, KM_USER0);
2377 __block_commit_write(inode, new_page,
2378 zerofrom, PAGE_CACHE_SIZE);
2379 unlock_page(new_page);
2380 page_cache_release(new_page);
2381 }
2382
2383 if (page->index < pgpos) {
2384
2385 zerofrom = offset;
2386 } else {
2387
2388 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2389
2390
2391 if (to > zerofrom && (zerofrom & (blocksize-1))) {
2392 *bytes |= (blocksize-1);
2393 (*bytes)++;
2394 }
2395
2396
2397 if (offset <= zerofrom)
2398 zerofrom = offset;
2399 }
2400 status = __block_prepare_write(inode, page, zerofrom, to, get_block);
2401 if (status)
2402 goto out1;
2403 if (zerofrom < offset) {
2404 kaddr = kmap_atomic(page, KM_USER0);
2405 memset(kaddr+zerofrom, 0, offset-zerofrom);
2406 flush_dcache_page(page);
2407 kunmap_atomic(kaddr, KM_USER0);
2408 __block_commit_write(inode, page, zerofrom, offset);
2409 }
2410 return 0;
2411out1:
2412 ClearPageUptodate(page);
2413 return status;
2414
2415out_unmap:
2416 ClearPageUptodate(new_page);
2417 unlock_page(new_page);
2418 page_cache_release(new_page);
2419out:
2420 return status;
2421}
2422
2423int block_prepare_write(struct page *page, unsigned from, unsigned to,
2424 get_block_t *get_block)
2425{
2426 struct inode *inode = page->mapping->host;
2427 int err = __block_prepare_write(inode, page, from, to, get_block);
2428 if (err)
2429 ClearPageUptodate(page);
2430 return err;
2431}
2432
2433int block_commit_write(struct page *page, unsigned from, unsigned to)
2434{
2435 struct inode *inode = page->mapping->host;
2436 __block_commit_write(inode,page,from,to);
2437 return 0;
2438}
2439
2440int generic_commit_write(struct file *file, struct page *page,
2441 unsigned from, unsigned to)
2442{
2443 struct inode *inode = page->mapping->host;
2444 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2445 __block_commit_write(inode,page,from,to);
2446
2447
2448
2449
2450 if (pos > inode->i_size) {
2451 i_size_write(inode, pos);
2452 mark_inode_dirty(inode);
2453 }
2454 return 0;
2455}
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2469{
2470 if (uptodate) {
2471 set_buffer_uptodate(bh);
2472 } else {
2473
2474 clear_buffer_uptodate(bh);
2475 }
2476 unlock_buffer(bh);
2477}
2478
2479
2480
2481
2482
2483int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
2484 get_block_t *get_block)
2485{
2486 struct inode *inode = page->mapping->host;
2487 const unsigned blkbits = inode->i_blkbits;
2488 const unsigned blocksize = 1 << blkbits;
2489 struct buffer_head map_bh;
2490 struct buffer_head *read_bh[MAX_BUF_PER_PAGE];
2491 unsigned block_in_page;
2492 unsigned block_start;
2493 sector_t block_in_file;
2494 char *kaddr;
2495 int nr_reads = 0;
2496 int i;
2497 int ret = 0;
2498 int is_mapped_to_disk = 1;
2499 int dirtied_it = 0;
2500
2501 if (PageMappedToDisk(page))
2502 return 0;
2503
2504 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2505 map_bh.b_page = page;
2506
2507
2508
2509
2510
2511
2512 for (block_start = 0, block_in_page = 0;
2513 block_start < PAGE_CACHE_SIZE;
2514 block_in_page++, block_start += blocksize) {
2515 unsigned block_end = block_start + blocksize;
2516 int create;
2517
2518 map_bh.b_state = 0;
2519 create = 1;
2520 if (block_start >= to)
2521 create = 0;
2522 ret = get_block(inode, block_in_file + block_in_page,
2523 &map_bh, create);
2524 if (ret)
2525 goto failed;
2526 if (!buffer_mapped(&map_bh))
2527 is_mapped_to_disk = 0;
2528 if (buffer_new(&map_bh))
2529 unmap_underlying_metadata(map_bh.b_bdev,
2530 map_bh.b_blocknr);
2531 if (PageUptodate(page))
2532 continue;
2533 if (buffer_new(&map_bh) || !buffer_mapped(&map_bh)) {
2534 kaddr = kmap_atomic(page, KM_USER0);
2535 if (block_start < from) {
2536 memset(kaddr+block_start, 0, from-block_start);
2537 dirtied_it = 1;
2538 }
2539 if (block_end > to) {
2540 memset(kaddr + to, 0, block_end - to);
2541 dirtied_it = 1;
2542 }
2543 flush_dcache_page(page);
2544 kunmap_atomic(kaddr, KM_USER0);
2545 continue;
2546 }
2547 if (buffer_uptodate(&map_bh))
2548 continue;
2549 if (block_start < from || block_end > to) {
2550 struct buffer_head *bh = alloc_buffer_head(GFP_NOFS);
2551
2552 if (!bh) {
2553 ret = -ENOMEM;
2554 goto failed;
2555 }
2556 bh->b_state = map_bh.b_state;
2557 atomic_set(&bh->b_count, 0);
2558 bh->b_this_page = NULL;
2559 bh->b_page = page;
2560 bh->b_blocknr = map_bh.b_blocknr;
2561 bh->b_size = blocksize;
2562 bh->b_data = (char *)(long)block_start;
2563 bh->b_bdev = map_bh.b_bdev;
2564 bh->b_private = NULL;
2565 read_bh[nr_reads++] = bh;
2566 }
2567 }
2568
2569 if (nr_reads) {
2570 struct buffer_head *bh;
2571
2572
2573
2574
2575
2576
2577 for (i = 0; i < nr_reads; i++) {
2578 bh = read_bh[i];
2579 lock_buffer(bh);
2580 bh->b_end_io = end_buffer_read_nobh;
2581 submit_bh(READ, bh);
2582 }
2583 for (i = 0; i < nr_reads; i++) {
2584 bh = read_bh[i];
2585 wait_on_buffer(bh);
2586 if (!buffer_uptodate(bh))
2587 ret = -EIO;
2588 free_buffer_head(bh);
2589 read_bh[i] = NULL;
2590 }
2591 if (ret)
2592 goto failed;
2593 }
2594
2595 if (is_mapped_to_disk)
2596 SetPageMappedToDisk(page);
2597 SetPageUptodate(page);
2598
2599
2600
2601
2602
2603
2604
2605
2606 if (dirtied_it)
2607 set_page_dirty(page);
2608
2609 return 0;
2610
2611failed:
2612 for (i = 0; i < nr_reads; i++) {
2613 if (read_bh[i])
2614 free_buffer_head(read_bh[i]);
2615 }
2616
2617
2618
2619
2620
2621 kaddr = kmap_atomic(page, KM_USER0);
2622 memset(kaddr, 0, PAGE_CACHE_SIZE);
2623 kunmap_atomic(kaddr, KM_USER0);
2624 SetPageUptodate(page);
2625 set_page_dirty(page);
2626 return ret;
2627}
2628EXPORT_SYMBOL(nobh_prepare_write);
2629
2630int nobh_commit_write(struct file *file, struct page *page,
2631 unsigned from, unsigned to)
2632{
2633 struct inode *inode = page->mapping->host;
2634 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2635
2636 set_page_dirty(page);
2637 if (pos > inode->i_size) {
2638 i_size_write(inode, pos);
2639 mark_inode_dirty(inode);
2640 }
2641 return 0;
2642}
2643EXPORT_SYMBOL(nobh_commit_write);
2644
2645
2646
2647
2648int nobh_truncate_page(struct address_space *mapping, loff_t from)
2649{
2650 struct inode *inode = mapping->host;
2651 unsigned blocksize = 1 << inode->i_blkbits;
2652 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2653 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2654 unsigned to;
2655 struct page *page;
2656 struct address_space_operations *a_ops = mapping->a_ops;
2657 char *kaddr;
2658 int ret = 0;
2659
2660 if ((offset & (blocksize - 1)) == 0)
2661 goto out;
2662
2663 ret = -ENOMEM;
2664 page = grab_cache_page(mapping, index);
2665 if (!page)
2666 goto out;
2667
2668 to = (offset + blocksize) & ~(blocksize - 1);
2669 ret = a_ops->prepare_write(NULL, page, offset, to);
2670 if (ret == 0) {
2671 kaddr = kmap_atomic(page, KM_USER0);
2672 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
2673 flush_dcache_page(page);
2674 kunmap_atomic(kaddr, KM_USER0);
2675 set_page_dirty(page);
2676 }
2677 unlock_page(page);
2678 page_cache_release(page);
2679out:
2680 return ret;
2681}
2682EXPORT_SYMBOL(nobh_truncate_page);
2683
2684int block_truncate_page(struct address_space *mapping,
2685 loff_t from, get_block_t *get_block)
2686{
2687 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2688 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2689 unsigned blocksize;
2690 pgoff_t iblock;
2691 unsigned length, pos;
2692 struct inode *inode = mapping->host;
2693 struct page *page;
2694 struct buffer_head *bh;
2695 void *kaddr;
2696 int err;
2697
2698 blocksize = 1 << inode->i_blkbits;
2699 length = offset & (blocksize - 1);
2700
2701
2702 if (!length)
2703 return 0;
2704
2705 length = blocksize - length;
2706 iblock = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2707
2708 page = grab_cache_page(mapping, index);
2709 err = -ENOMEM;
2710 if (!page)
2711 goto out;
2712
2713 if (!page_has_buffers(page))
2714 create_empty_buffers(page, blocksize, 0);
2715
2716
2717 bh = page_buffers(page);
2718 pos = blocksize;
2719 while (offset >= pos) {
2720 bh = bh->b_this_page;
2721 iblock++;
2722 pos += blocksize;
2723 }
2724
2725 err = 0;
2726 if (!buffer_mapped(bh)) {
2727 err = get_block(inode, iblock, bh, 0);
2728 if (err)
2729 goto unlock;
2730
2731 if (!buffer_mapped(bh))
2732 goto unlock;
2733 }
2734
2735
2736 if (PageUptodate(page))
2737 set_buffer_uptodate(bh);
2738
2739 if (!buffer_uptodate(bh) && !buffer_delay(bh)) {
2740 err = -EIO;
2741 ll_rw_block(READ, 1, &bh);
2742 wait_on_buffer(bh);
2743
2744 if (!buffer_uptodate(bh))
2745 goto unlock;
2746 }
2747
2748 kaddr = kmap_atomic(page, KM_USER0);
2749 memset(kaddr + offset, 0, length);
2750 flush_dcache_page(page);
2751 kunmap_atomic(kaddr, KM_USER0);
2752
2753 mark_buffer_dirty(bh);
2754 err = 0;
2755
2756unlock:
2757 unlock_page(page);
2758 page_cache_release(page);
2759out:
2760 return err;
2761}
2762
2763
2764
2765
2766int block_write_full_page(struct page *page, get_block_t *get_block,
2767 struct writeback_control *wbc)
2768{
2769 struct inode * const inode = page->mapping->host;
2770 loff_t i_size = i_size_read(inode);
2771 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2772 unsigned offset;
2773 void *kaddr;
2774
2775
2776 if (page->index < end_index)
2777 return __block_write_full_page(inode, page, get_block, wbc);
2778
2779
2780 offset = i_size & (PAGE_CACHE_SIZE-1);
2781 if (page->index >= end_index+1 || !offset) {
2782
2783
2784
2785
2786
2787 block_invalidatepage(page, 0);
2788 unlock_page(page);
2789 return 0;
2790 }
2791
2792
2793
2794
2795
2796
2797
2798
2799 kaddr = kmap_atomic(page, KM_USER0);
2800 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
2801 flush_dcache_page(page);
2802 kunmap_atomic(kaddr, KM_USER0);
2803 return __block_write_full_page(inode, page, get_block, wbc);