1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/kernel.h>
14#include <linux/module.h>
15#include <linux/backing-dev.h>
16#include <linux/bio.h>
17#include <linux/blkdev.h>
18#include <linux/highmem.h>
19#include <linux/mm.h>
20#include <linux/kernel_stat.h>
21#include <linux/string.h>
22#include <linux/init.h>
23#include <linux/bootmem.h>
24#include <linux/completion.h>
25#include <linux/slab.h>
26#include <linux/swap.h>
27#include <linux/writeback.h>
28#include <linux/interrupt.h>
29#include <linux/cpu.h>
30#include <linux/blktrace_api.h>
31
32
33
34
35#include <scsi/scsi_cmnd.h>
36
37static void blk_unplug_work(void *data);
38static void blk_unplug_timeout(unsigned long data);
39static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
40static void init_request_from_bio(struct request *req, struct bio *bio);
41static int __make_request(request_queue_t *q, struct bio *bio);
42
43
44
45
46static kmem_cache_t *request_cachep;
47
48
49
50
51static kmem_cache_t *requestq_cachep;
52
53
54
55
56static kmem_cache_t *iocontext_cachep;
57
58static wait_queue_head_t congestion_wqh[2] = {
59 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
60 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
61 };
62
63
64
65
66static struct workqueue_struct *kblockd_workqueue;
67
68unsigned long blk_max_low_pfn, blk_max_pfn;
69
70EXPORT_SYMBOL(blk_max_low_pfn);
71EXPORT_SYMBOL(blk_max_pfn);
72
73static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
74
75
76#define BLK_BATCH_TIME (HZ/50UL)
77
78
79#define BLK_BATCH_REQ 32
80
81
82
83
84
85
86static inline int queue_congestion_on_threshold(struct request_queue *q)
87{
88 return q->nr_congestion_on;
89}
90
91
92
93
94static inline int queue_congestion_off_threshold(struct request_queue *q)
95{
96 return q->nr_congestion_off;
97}
98
99static void blk_queue_congestion_threshold(struct request_queue *q)
100{
101 int nr;
102
103 nr = q->nr_requests - (q->nr_requests / 8) + 1;
104 if (nr > q->nr_requests)
105 nr = q->nr_requests;
106 q->nr_congestion_on = nr;
107
108 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
109 if (nr < 1)
110 nr = 1;
111 q->nr_congestion_off = nr;
112}
113
114
115
116
117
118
119static void clear_queue_congested(request_queue_t *q, int rw)
120{
121 enum bdi_state bit;
122 wait_queue_head_t *wqh = &congestion_wqh[rw];
123
124 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
125 clear_bit(bit, &q->backing_dev_info.state);
126 smp_mb__after_clear_bit();
127 if (waitqueue_active(wqh))
128 wake_up(wqh);
129}
130
131
132
133
134
135static void set_queue_congested(request_queue_t *q, int rw)
136{
137 enum bdi_state bit;
138
139 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
140 set_bit(bit, &q->backing_dev_info.state);
141}
142
143
144
145
146
147
148
149
150
151
152struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
153{
154 struct backing_dev_info *ret = NULL;
155 request_queue_t *q = bdev_get_queue(bdev);
156
157 if (q)
158 ret = &q->backing_dev_info;
159 return ret;
160}
161
162EXPORT_SYMBOL(blk_get_backing_dev_info);
163
164void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data)
165{
166 q->activity_fn = fn;
167 q->activity_data = data;
168}
169
170EXPORT_SYMBOL(blk_queue_activity_fn);
171
172
173
174
175
176
177
178
179
180
181
182
183void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn)
184{
185 q->prep_rq_fn = pfn;
186}
187
188EXPORT_SYMBOL(blk_queue_prep_rq);
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn)
207{
208 q->merge_bvec_fn = mbfn;
209}
210
211EXPORT_SYMBOL(blk_queue_merge_bvec);
212
213void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn)
214{
215 q->softirq_done_fn = fn;
216}
217
218EXPORT_SYMBOL(blk_queue_softirq_done);
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
243{
244
245
246
247 q->nr_requests = BLKDEV_MAX_RQ;
248 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
249 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
250 q->make_request_fn = mfn;
251 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
252 q->backing_dev_info.state = 0;
253 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
254 blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
255 blk_queue_hardsect_size(q, 512);
256 blk_queue_dma_alignment(q, 511);
257 blk_queue_congestion_threshold(q);
258 q->nr_batching = BLK_BATCH_REQ;
259
260 q->unplug_thresh = 4;
261 q->unplug_delay = (3 * HZ) / 1000;
262 if (q->unplug_delay == 0)
263 q->unplug_delay = 1;
264
265 INIT_WORK(&q->unplug_work, blk_unplug_work, q);
266
267 q->unplug_timer.function = blk_unplug_timeout;
268 q->unplug_timer.data = (unsigned long)q;
269
270
271
272
273 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
274
275 blk_queue_activity_fn(q, NULL, NULL);
276}
277
278EXPORT_SYMBOL(blk_queue_make_request);
279
280static inline void rq_init(request_queue_t *q, struct request *rq)
281{
282 INIT_LIST_HEAD(&rq->queuelist);
283 INIT_LIST_HEAD(&rq->donelist);
284
285 rq->errors = 0;
286 rq->rq_status = RQ_ACTIVE;
287 rq->bio = rq->biotail = NULL;
288 rq->ioprio = 0;
289 rq->buffer = NULL;
290 rq->ref_count = 1;
291 rq->q = q;
292 rq->waiting = NULL;
293 rq->special = NULL;
294 rq->data_len = 0;
295 rq->data = NULL;
296 rq->nr_phys_segments = 0;
297 rq->sense = NULL;
298 rq->end_io = NULL;
299 rq->end_io_data = NULL;
300 rq->completion_data = NULL;
301}
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316int blk_queue_ordered(request_queue_t *q, unsigned ordered,
317 prepare_flush_fn *prepare_flush_fn)
318{
319 if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
320 prepare_flush_fn == NULL) {
321 printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n");
322 return -EINVAL;
323 }
324
325 if (ordered != QUEUE_ORDERED_NONE &&
326 ordered != QUEUE_ORDERED_DRAIN &&
327 ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
328 ordered != QUEUE_ORDERED_DRAIN_FUA &&
329 ordered != QUEUE_ORDERED_TAG &&
330 ordered != QUEUE_ORDERED_TAG_FLUSH &&
331 ordered != QUEUE_ORDERED_TAG_FUA) {
332 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
333 return -EINVAL;
334 }
335
336 q->ordered = ordered;
337 q->next_ordered = ordered;
338 q->prepare_flush_fn = prepare_flush_fn;
339
340 return 0;
341}
342
343EXPORT_SYMBOL(blk_queue_ordered);
344
345
346
347
348
349
350
351
352
353
354
355void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)
356{
357 q->issue_flush_fn = iff;
358}
359
360EXPORT_SYMBOL(blk_queue_issue_flush_fn);
361
362
363
364
365inline unsigned blk_ordered_cur_seq(request_queue_t *q)
366{
367 if (!q->ordseq)
368 return 0;
369 return 1 << ffz(q->ordseq);
370}
371
372unsigned blk_ordered_req_seq(struct request *rq)
373{
374 request_queue_t *q = rq->q;
375
376 BUG_ON(q->ordseq == 0);
377
378 if (rq == &q->pre_flush_rq)
379 return QUEUE_ORDSEQ_PREFLUSH;
380 if (rq == &q->bar_rq)
381 return QUEUE_ORDSEQ_BAR;
382 if (rq == &q->post_flush_rq)
383 return QUEUE_ORDSEQ_POSTFLUSH;
384
385 if ((rq->flags & REQ_ORDERED_COLOR) ==
386 (q->orig_bar_rq->flags & REQ_ORDERED_COLOR))
387 return QUEUE_ORDSEQ_DRAIN;
388 else
389 return QUEUE_ORDSEQ_DONE;
390}
391
392void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error)
393{
394 struct request *rq;
395 int uptodate;
396
397 if (error && !q->orderr)
398 q->orderr = error;
399
400 BUG_ON(q->ordseq & seq);
401 q->ordseq |= seq;
402
403 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
404 return;
405
406
407
408
409 rq = q->orig_bar_rq;
410 uptodate = q->orderr ? q->orderr : 1;
411
412 q->ordseq = 0;
413
414 end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
415 end_that_request_last(rq, uptodate);
416}
417
418static void pre_flush_end_io(struct request *rq, int error)
419{
420 elv_completed_request(rq->q, rq);
421 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
422}
423
424static void bar_end_io(struct request *rq, int error)
425{
426 elv_completed_request(rq->q, rq);
427 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
428}
429
430static void post_flush_end_io(struct request *rq, int error)
431{
432 elv_completed_request(rq->q, rq);
433 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
434}
435
436static void queue_flush(request_queue_t *q, unsigned which)
437{
438 struct request *rq;
439 rq_end_io_fn *end_io;
440
441 if (which == QUEUE_ORDERED_PREFLUSH) {
442 rq = &q->pre_flush_rq;
443 end_io = pre_flush_end_io;
444 } else {
445 rq = &q->post_flush_rq;
446 end_io = post_flush_end_io;
447 }
448
449 rq_init(q, rq);
450 rq->flags = REQ_HARDBARRIER;
451 rq->elevator_private = NULL;
452 rq->rq_disk = q->bar_rq.rq_disk;
453 rq->rl = NULL;
454 rq->end_io = end_io;
455 q->prepare_flush_fn(q, rq);
456
457 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
458}
459
460static inline struct request *start_ordered(request_queue_t *q,
461 struct request *rq)
462{
463 q->bi_size = 0;
464 q->orderr = 0;
465 q->ordered = q->next_ordered;
466 q->ordseq |= QUEUE_ORDSEQ_STARTED;
467
468
469
470
471 blkdev_dequeue_request(rq);
472 q->orig_bar_rq = rq;
473 rq = &q->bar_rq;
474 rq_init(q, rq);
475 rq->flags = bio_data_dir(q->orig_bar_rq->bio);
476 rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
477 rq->elevator_private = NULL;
478 rq->rl = NULL;
479 init_request_from_bio(rq, q->orig_bar_rq->bio);
480 rq->end_io = bar_end_io;
481
482
483
484
485
486
487
488 if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
489 queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
490 else
491 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
492
493 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
494
495 if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
496 queue_flush(q, QUEUE_ORDERED_PREFLUSH);
497 rq = &q->pre_flush_rq;
498 } else
499 q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
500
501 if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
502 q->ordseq |= QUEUE_ORDSEQ_DRAIN;
503 else
504 rq = NULL;
505
506 return rq;
507}
508
509int blk_do_ordered(request_queue_t *q, struct request **rqp)
510{
511 struct request *rq = *rqp;
512 int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
513
514 if (!q->ordseq) {
515 if (!is_barrier)
516 return 1;
517
518 if (q->next_ordered != QUEUE_ORDERED_NONE) {
519 *rqp = start_ordered(q, rq);
520 return 1;
521 } else {
522
523
524
525
526 blkdev_dequeue_request(rq);
527 end_that_request_first(rq, -EOPNOTSUPP,
528 rq->hard_nr_sectors);
529 end_that_request_last(rq, -EOPNOTSUPP);
530 *rqp = NULL;
531 return 0;
532 }
533 }
534
535
536
537
538
539
540 if (!blk_fs_request(rq) &&
541 rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
542 return 1;
543
544 if (q->ordered & QUEUE_ORDERED_TAG) {
545
546 if (is_barrier && rq != &q->bar_rq)
547 *rqp = NULL;
548 } else {
549
550 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
551 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
552 *rqp = NULL;
553 }
554
555 return 1;
556}
557
558static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
559{
560 request_queue_t *q = bio->bi_private;
561 struct bio_vec *bvec;
562 int i;
563
564
565
566
567
568
569 q->bi_size += bytes;
570
571 if (bio->bi_size)
572 return 1;
573
574
575 bio->bi_idx = 0;
576 bio_for_each_segment(bvec, bio, i) {
577 bvec->bv_len += bvec->bv_offset;
578 bvec->bv_offset = 0;
579 }
580
581
582 set_bit(BIO_UPTODATE, &bio->bi_flags);
583 bio->bi_size = q->bi_size;
584 bio->bi_sector -= (q->bi_size >> 9);
585 q->bi_size = 0;
586
587 return 0;
588}
589
590static inline int ordered_bio_endio(struct request *rq, struct bio *bio,
591 unsigned int nbytes, int error)
592{
593 request_queue_t *q = rq->q;
594 bio_end_io_t *endio;
595 void *private;
596
597 if (&q->bar_rq != rq)
598 return 0;
599
600
601
602
603 if (error && !q->orderr)
604 q->orderr = error;
605
606 endio = bio->bi_end_io;
607 private = bio->bi_private;
608 bio->bi_end_io = flush_dry_bio_endio;
609 bio->bi_private = q;
610
611 bio_endio(bio, nbytes, error);
612
613 bio->bi_end_io = endio;
614 bio->bi_private = private;
615
616 return 1;
617}
618
619
620
621
622
623
624
625
626
627
628
629
630void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
631{
632 unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
633 int dma = 0;
634
635 q->bounce_gfp = GFP_NOIO;
636#if BITS_PER_LONG == 64
637
638
639
640 if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
641 dma = 1;
642 q->bounce_pfn = max_low_pfn;
643#else
644 if (bounce_pfn < blk_max_low_pfn)
645 dma = 1;
646 q->bounce_pfn = bounce_pfn;
647#endif
648 if (dma) {
649 init_emergency_isa_pool();
650 q->bounce_gfp = GFP_NOIO | GFP_DMA;
651 q->bounce_pfn = bounce_pfn;
652 }
653}
654
655EXPORT_SYMBOL(blk_queue_bounce_limit);
656
657
658
659
660
661
662
663
664
665
666void blk_queue_max_sectors(request_queue_t *q, unsigned int max_sectors)
667{
668 if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
669 max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
670 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);
671 }
672
673 if (BLK_DEF_MAX_SECTORS > max_sectors)
674 q->max_hw_sectors = q->max_sectors = max_sectors;
675 else {
676 q->max_sectors = BLK_DEF_MAX_SECTORS;
677 q->max_hw_sectors = max_sectors;
678 }
679}
680
681EXPORT_SYMBOL(blk_queue_max_sectors);
682
683
684
685
686
687
688
689
690
691
692
693void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments)
694{
695 if (!max_segments) {
696 max_segments = 1;
697 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
698 }
699
700 q->max_phys_segments = max_segments;
701}
702
703EXPORT_SYMBOL(blk_queue_max_phys_segments);
704
705
706
707
708
709
710
711
712
713
714
715
716void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments)
717{
718 if (!max_segments) {
719 max_segments = 1;
720 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
721 }
722
723 q->max_hw_segments = max_segments;
724}
725
726EXPORT_SYMBOL(blk_queue_max_hw_segments);
727
728
729
730
731
732
733
734
735
736
737void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size)
738{
739 if (max_size < PAGE_CACHE_SIZE) {
740 max_size = PAGE_CACHE_SIZE;
741 printk("%s: set to minimum %d\n", __FUNCTION__, max_size);
742 }
743
744 q->max_segment_size = max_size;
745}
746
747EXPORT_SYMBOL(blk_queue_max_segment_size);
748
749
750
751
752
753
754
755
756
757
758
759
760void blk_queue_hardsect_size(request_queue_t *q, unsigned short size)
761{
762 q->hardsect_size = size;
763}
764
765EXPORT_SYMBOL(blk_queue_hardsect_size);
766
767
768
769
770#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
771
772
773
774
775
776
777void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)
778{
779
780 t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);
781 t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors);
782
783 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
784 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
785 t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
786 t->hardsect_size = max(t->hardsect_size,b->hardsect_size);
787 if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
788 clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags);
789}
790
791EXPORT_SYMBOL(blk_queue_stack_limits);
792
793
794
795
796
797
798void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask)
799{
800 if (mask < PAGE_CACHE_SIZE - 1) {
801 mask = PAGE_CACHE_SIZE - 1;
802 printk("%s: set to minimum %lx\n", __FUNCTION__, mask);
803 }
804
805 q->seg_boundary_mask = mask;
806}
807
808EXPORT_SYMBOL(blk_queue_segment_boundary);
809
810
811
812
813
814
815
816
817
818
819
820void blk_queue_dma_alignment(request_queue_t *q, int mask)
821{
822 q->dma_alignment = mask;
823}
824
825EXPORT_SYMBOL(blk_queue_dma_alignment);
826
827
828
829
830
831
832
833
834
835
836
837
838struct request *blk_queue_find_tag(request_queue_t *q, int tag)
839{
840 struct blk_queue_tag *bqt = q->queue_tags;
841
842 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
843 return NULL;
844
845 return bqt->tag_index[tag];
846}
847
848EXPORT_SYMBOL(blk_queue_find_tag);
849
850
851
852
853
854
855
856
857static int __blk_free_tags(struct blk_queue_tag *bqt)
858{
859 int retval;
860
861 retval = atomic_dec_and_test(&bqt->refcnt);
862 if (retval) {
863 BUG_ON(bqt->busy);
864 BUG_ON(!list_empty(&bqt->busy_list));
865
866 kfree(bqt->tag_index);
867 bqt->tag_index = NULL;
868
869 kfree(bqt->tag_map);
870 bqt->tag_map = NULL;
871
872 kfree(bqt);
873
874 }
875
876 return retval;
877}
878
879
880
881
882
883
884
885
886
887static void __blk_queue_free_tags(request_queue_t *q)
888{
889 struct blk_queue_tag *bqt = q->queue_tags;
890
891 if (!bqt)
892 return;
893
894 __blk_free_tags(bqt);
895
896 q->queue_tags = NULL;
897 q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
898}
899
900
901
902
903
904
905
906
907
908
909void blk_free_tags(struct blk_queue_tag *bqt)
910{
911 if (unlikely(!__blk_free_tags(bqt)))
912 BUG();
913}
914EXPORT_SYMBOL(blk_free_tags);
915
916
917
918
919
920
921
922
923
924void blk_queue_free_tags(request_queue_t *q)
925{
926 clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
927}
928
929EXPORT_SYMBOL(blk_queue_free_tags);
930
931static int
932init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)
933{
934 struct request **tag_index;
935 unsigned long *tag_map;
936 int nr_ulongs;
937
938 if (q && depth > q->nr_requests * 2) {
939 depth = q->nr_requests * 2;
940 printk(KERN_ERR "%s: adjusted depth to %d\n",
941 __FUNCTION__, depth);
942 }
943
944 tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC);
945 if (!tag_index)
946 goto fail;
947
948 nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;
949 tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC);
950 if (!tag_map)
951 goto fail;
952
953 tags->real_max_depth = depth;
954 tags->max_depth = depth;
955 tags->tag_index = tag_index;
956 tags->tag_map = tag_map;
957
958 return 0;
959fail:
960 kfree(tag_index);
961 return -ENOMEM;
962}
963
964static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
965 int depth)
966{
967 struct blk_queue_tag *tags;
968
969 tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
970 if (!tags)
971 goto fail;
972
973 if (init_tag_map(q, tags, depth))
974 goto fail;
975
976 INIT_LIST_HEAD(&tags->busy_list);
977 tags->busy = 0;
978 atomic_set(&tags->refcnt, 1);
979 return tags;
980fail:
981 kfree(tags);
982 return NULL;
983}
984
985
986
987
988
989
990struct blk_queue_tag *blk_init_tags(int depth)
991{
992 return __blk_queue_init_tags(NULL, depth);
993}
994EXPORT_SYMBOL(blk_init_tags);
995
996
997
998
999
1000
1001
1002int blk_queue_init_tags(request_queue_t *q, int depth,
1003 struct blk_queue_tag *tags)
1004{
1005 int rc;
1006
1007 BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
1008
1009 if (!tags && !q->queue_tags) {
1010 tags = __blk_queue_init_tags(q, depth);
1011
1012 if (!tags)
1013 goto fail;
1014 } else if (q->queue_tags) {
1015 if ((rc = blk_queue_resize_tags(q, depth)))
1016 return rc;
1017 set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
1018 return 0;
1019 } else
1020 atomic_inc(&tags->refcnt);
1021
1022
1023
1024
1025 q->queue_tags = tags;
1026 q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);
1027 return 0;
1028fail:
1029 kfree(tags);
1030 return -ENOMEM;
1031}
1032
1033EXPORT_SYMBOL(blk_queue_init_tags);
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043int blk_queue_resize_tags(request_queue_t *q, int new_depth)
1044{
1045 struct blk_queue_tag *bqt = q->queue_tags;
1046 struct request **tag_index;
1047 unsigned long *tag_map;
1048 int max_depth, nr_ulongs;
1049
1050 if (!bqt)
1051 return -ENXIO;
1052
1053
1054
1055
1056
1057
1058
1059 if (new_depth <= bqt->real_max_depth) {
1060 bqt->max_depth = new_depth;
1061 return 0;
1062 }
1063
1064
1065
1066
1067
1068 if (atomic_read(&bqt->refcnt) != 1)
1069 return -EBUSY;
1070
1071
1072
1073
1074 tag_index = bqt->tag_index;
1075 tag_map = bqt->tag_map;
1076 max_depth = bqt->real_max_depth;
1077
1078 if (init_tag_map(q, bqt, new_depth))
1079 return -ENOMEM;
1080
1081 memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *));
1082 nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG;
1083 memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long));
1084
1085 kfree(tag_index);
1086 kfree(tag_map);
1087 return 0;
1088}
1089
1090EXPORT_SYMBOL(blk_queue_resize_tags);
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106void blk_queue_end_tag(request_queue_t *q, struct request *rq)
1107{
1108 struct blk_queue_tag *bqt = q->queue_tags;
1109 int tag = rq->tag;
1110
1111 BUG_ON(tag == -1);
1112
1113 if (unlikely(tag >= bqt->real_max_depth))
1114
1115
1116
1117
1118 return;
1119
1120 if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) {
1121 printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
1122 __FUNCTION__, tag);
1123 return;
1124 }
1125
1126 list_del_init(&rq->queuelist);
1127 rq->flags &= ~REQ_QUEUED;
1128 rq->tag = -1;
1129
1130 if (unlikely(bqt->tag_index[tag] == NULL))
1131 printk(KERN_ERR "%s: tag %d is missing\n",
1132 __FUNCTION__, tag);
1133
1134 bqt->tag_index[tag] = NULL;
1135 bqt->busy--;
1136}
1137
1138EXPORT_SYMBOL(blk_queue_end_tag);
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158int blk_queue_start_tag(request_queue_t *q, struct request *rq)
1159{
1160 struct blk_queue_tag *bqt = q->queue_tags;
1161 int tag;
1162
1163 if (unlikely((rq->flags & REQ_QUEUED))) {
1164 printk(KERN_ERR
1165 "%s: request %p for device [%s] already tagged %d",
1166 __FUNCTION__, rq,
1167 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);
1168 BUG();
1169 }
1170
1171
1172
1173
1174
1175 do {
1176 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
1177 if (tag >= bqt->max_depth)
1178 return 1;
1179
1180 } while (test_and_set_bit(tag, bqt->tag_map));
1181
1182 rq->flags |= REQ_QUEUED;
1183 rq->tag = tag;
1184 bqt->tag_index[tag] = rq;
1185 blkdev_dequeue_request(rq);
1186 list_add(&rq->queuelist, &bqt->busy_list);
1187 bqt->busy++;
1188 return 0;
1189}
1190
1191EXPORT_SYMBOL(blk_queue_start_tag);
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205void blk_queue_invalidate_tags(request_queue_t *q)
1206{
1207 struct blk_queue_tag *bqt = q->queue_tags;
1208 struct list_head *tmp, *n;
1209 struct request *rq;
1210
1211 list_for_each_safe(tmp, n, &bqt->busy_list) {
1212 rq = list_entry_rq(tmp);
1213
1214 if (rq->tag == -1) {
1215 printk(KERN_ERR
1216 "%s: bad tag found on list\n", __FUNCTION__);
1217 list_del_init(&rq->queuelist);
1218 rq->flags &= ~REQ_QUEUED;
1219 } else
1220 blk_queue_end_tag(q, rq);
1221
1222 rq->flags &= ~REQ_STARTED;
1223 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1224 }
1225}
1226
1227EXPORT_SYMBOL(blk_queue_invalidate_tags);
1228
1229static const char * const rq_flags[] = {
1230 "REQ_RW",
1231 "REQ_FAILFAST",
1232 "REQ_SORTED",
1233 "REQ_SOFTBARRIER",
1234 "REQ_HARDBARRIER",
1235 "REQ_FUA",
1236 "REQ_CMD",
1237 "REQ_NOMERGE",
1238 "REQ_STARTED",
1239 "REQ_DONTPREP",
1240 "REQ_QUEUED",
1241 "REQ_ELVPRIV",
1242 "REQ_PC",
1243 "REQ_BLOCK_PC",
1244 "REQ_SENSE",
1245 "REQ_FAILED",
1246 "REQ_QUIET",
1247 "REQ_SPECIAL",
1248 "REQ_DRIVE_CMD",
1249 "REQ_DRIVE_TASK",
1250 "REQ_DRIVE_TASKFILE",
1251 "REQ_PREEMPT",
1252 "REQ_PM_SUSPEND",
1253 "REQ_PM_RESUME",
1254 "REQ_PM_SHUTDOWN",
1255 "REQ_ORDERED_COLOR",
1256};
1257
1258void blk_dump_rq_flags(struct request *rq, char *msg)
1259{
1260 int bit;
1261
1262 printk("%s: dev %s: flags = ", msg,
1263 rq->rq_disk ? rq->rq_disk->disk_name : "?");
1264 bit = 0;
1265 do {
1266 if (rq->flags & (1 << bit))
1267 printk("%s ", rq_flags[bit]);
1268 bit++;
1269 } while (bit < __REQ_NR_BITS);
1270
1271 printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
1272 rq->nr_sectors,
1273 rq->current_nr_sectors);
1274 printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
1275
1276 if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) {
1277 printk("cdb: ");
1278 for (bit = 0; bit < sizeof(rq->cmd); bit++)
1279 printk("%02x ", rq->cmd[bit]);
1280 printk("\n");
1281 }
1282}
1283
1284EXPORT_SYMBOL(blk_dump_rq_flags);
1285
1286void blk_recount_segments(request_queue_t *q, struct bio *bio)
1287{
1288 struct bio_vec *bv, *bvprv = NULL;
1289 int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster;
1290 int high, highprv = 1;
1291
1292 if (unlikely(!bio->bi_io_vec))
1293 return;
1294
1295 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
1296 hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0;
1297 bio_for_each_segment(bv, bio, i) {
1298
1299
1300
1301
1302
1303 high = page_to_pfn(bv->bv_page) >= q->bounce_pfn;
1304 if (high || highprv)
1305 goto new_hw_segment;
1306 if (cluster) {
1307 if (seg_size + bv->bv_len > q->max_segment_size)
1308 goto new_segment;
1309 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
1310 goto new_segment;
1311 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
1312 goto new_segment;
1313 if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
1314 goto new_hw_segment;
1315
1316 seg_size += bv->bv_len;
1317 hw_seg_size += bv->bv_len;
1318 bvprv = bv;
1319 continue;
1320 }
1321new_segment:
1322 if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
1323 !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
1324 hw_seg_size += bv->bv_len;
1325 } else {
1326new_hw_segment:
1327 if (hw_seg_size > bio->bi_hw_front_size)
1328 bio->bi_hw_front_size = hw_seg_size;
1329 hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
1330 nr_hw_segs++;
1331 }
1332
1333 nr_phys_segs++;
1334 bvprv = bv;
1335 seg_size = bv->bv_len;
1336 highprv = high;
1337 }
1338 if (hw_seg_size > bio->bi_hw_back_size)
1339 bio->bi_hw_back_size = hw_seg_size;
1340 if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size)
1341 bio->bi_hw_front_size = hw_seg_size;
1342 bio->bi_phys_segments = nr_phys_segs;
1343 bio->bi_hw_segments = nr_hw_segs;
1344 bio->bi_flags |= (1 << BIO_SEG_VALID);
1345}
1346
1347
1348static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio,
1349 struct bio *nxt)
1350{
1351 if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
1352 return 0;
1353
1354 if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
1355 return 0;
1356 if (bio->bi_size + nxt->bi_size > q->max_segment_size)
1357 return 0;
1358
1359
1360
1361
1362
1363 if (BIO_SEG_BOUNDARY(q, bio, nxt))
1364 return 1;
1365
1366 return 0;
1367}
1368
1369static int blk_hw_contig_segment(request_queue_t *q, struct bio *bio,
1370 struct bio *nxt)
1371{
1372 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1373 blk_recount_segments(q, bio);
1374 if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))
1375 blk_recount_segments(q, nxt);
1376 if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
1377 BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size))
1378 return 0;
1379 if (bio->bi_size + nxt->bi_size > q->max_segment_size)
1380 return 0;
1381
1382 return 1;
1383}
1384
1385
1386
1387
1388
1389int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg)
1390{
1391 struct bio_vec *bvec, *bvprv;
1392 struct bio *bio;
1393 int nsegs, i, cluster;
1394
1395 nsegs = 0;
1396 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
1397
1398
1399
1400
1401 bvprv = NULL;
1402 rq_for_each_bio(bio, rq) {
1403
1404
1405
1406 bio_for_each_segment(bvec, bio, i) {
1407 int nbytes = bvec->bv_len;
1408
1409 if (bvprv && cluster) {
1410 if (sg[nsegs - 1].length + nbytes > q->max_segment_size)
1411 goto new_segment;
1412
1413 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
1414 goto new_segment;
1415 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
1416 goto new_segment;
1417
1418 sg[nsegs - 1].length += nbytes;
1419 } else {
1420new_segment:
1421 memset(&sg[nsegs],0,sizeof(struct scatterlist));
1422 sg[nsegs].page = bvec->bv_page;
1423 sg[nsegs].length = nbytes;
1424 sg[nsegs].offset = bvec->bv_offset;
1425
1426 nsegs++;
1427 }
1428 bvprv = bvec;
1429 }
1430 }
1431
1432 return nsegs;
1433}
1434
1435EXPORT_SYMBOL(blk_rq_map_sg);
1436
1437
1438
1439
1440
1441
1442static inline int ll_new_mergeable(request_queue_t *q,
1443 struct request *req,
1444 struct bio *bio)
1445{
1446 int nr_phys_segs = bio_phys_segments(q, bio);
1447
1448 if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1449 req->flags |= REQ_NOMERGE;
1450 if (req == q->last_merge)
1451 q->last_merge = NULL;
1452 return 0;
1453 }
1454
1455
1456
1457
1458
1459 req->nr_phys_segments += nr_phys_segs;
1460 return 1;
1461}
1462
1463static inline int ll_new_hw_segment(request_queue_t *q,
1464 struct request *req,
1465 struct bio *bio)
1466{
1467 int nr_hw_segs = bio_hw_segments(q, bio);
1468 int nr_phys_segs = bio_phys_segments(q, bio);
1469
1470 if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
1471 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1472 req->flags |= REQ_NOMERGE;
1473 if (req == q->last_merge)
1474 q->last_merge = NULL;
1475 return 0;
1476 }
1477
1478
1479
1480
1481
1482 req->nr_hw_segments += nr_hw_segs;
1483 req->nr_phys_segments += nr_phys_segs;
1484 return 1;
1485}
1486
1487static int ll_back_merge_fn(request_queue_t *q, struct request *req,
1488 struct bio *bio)
1489{
1490 unsigned short max_sectors;
1491 int len;
1492
1493 if (unlikely(blk_pc_request(req)))
1494 max_sectors = q->max_hw_sectors;
1495 else
1496 max_sectors = q->max_sectors;
1497
1498 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1499 req->flags |= REQ_NOMERGE;
1500 if (req == q->last_merge)
1501 q->last_merge = NULL;
1502 return 0;
1503 }
1504 if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
1505 blk_recount_segments(q, req->biotail);
1506 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1507 blk_recount_segments(q, bio);
1508 len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
1509 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) &&
1510 !BIOVEC_VIRT_OVERSIZE(len)) {
1511 int mergeable = ll_new_mergeable(q, req, bio);
1512
1513 if (mergeable) {
1514 if (req->nr_hw_segments == 1)
1515 req->bio->bi_hw_front_size = len;
1516 if (bio->bi_hw_segments == 1)
1517 bio->bi_hw_back_size = len;
1518 }
1519 return mergeable;
1520 }
1521
1522 return ll_new_hw_segment(q, req, bio);
1523}
1524
1525static int ll_front_merge_fn(request_queue_t *q, struct request *req,
1526 struct bio *bio)
1527{
1528 unsigned short max_sectors;
1529 int len;
1530
1531 if (unlikely(blk_pc_request(req)))
1532 max_sectors = q->max_hw_sectors;
1533 else
1534 max_sectors = q->max_sectors;
1535
1536
1537 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1538 req->flags |= REQ_NOMERGE;
1539 if (req == q->last_merge)
1540 q->last_merge = NULL;
1541 return 0;
1542 }
1543 len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
1544 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1545 blk_recount_segments(q, bio);
1546 if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))
1547 blk_recount_segments(q, req->bio);
1548 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
1549 !BIOVEC_VIRT_OVERSIZE(len)) {
1550 int mergeable = ll_new_mergeable(q, req, bio);
1551
1552 if (mergeable) {
1553 if (bio->bi_hw_segments == 1)
1554 bio->bi_hw_front_size = len;
1555 if (req->nr_hw_segments == 1)
1556 req->biotail->bi_hw_back_size = len;
1557 }
1558 return mergeable;
1559 }
1560
1561 return ll_new_hw_segment(q, req, bio);
1562}
1563
1564static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
1565 struct request *next)
1566{
1567 int total_phys_segments;
1568 int total_hw_segments;
1569
1570
1571
1572
1573
1574 if (req->special || next->special)
1575 return 0;
1576
1577
1578
1579
1580 if ((req->nr_sectors + next->nr_sectors) > q->max_sectors)
1581 return 0;
1582
1583 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
1584 if (blk_phys_contig_segment(q, req->biotail, next->bio))
1585 total_phys_segments--;
1586
1587 if (total_phys_segments > q->max_phys_segments)
1588 return 0;
1589
1590 total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
1591 if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
1592 int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size;
1593
1594
1595
1596 if (req->nr_hw_segments == 1)
1597 req->bio->bi_hw_front_size = len;
1598 if (next->nr_hw_segments == 1)
1599 next->biotail->bi_hw_back_size = len;
1600 total_hw_segments--;
1601 }
1602
1603 if (total_hw_segments > q->max_hw_segments)
1604 return 0;
1605
1606
1607 req->nr_phys_segments = total_phys_segments;
1608 req->nr_hw_segments = total_hw_segments;
1609 return 1;
1610}
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620void blk_plug_device(request_queue_t *q)
1621{
1622 WARN_ON(!irqs_disabled());
1623
1624
1625
1626
1627
1628 if (blk_queue_stopped(q))
1629 return;
1630
1631 if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
1632 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
1633 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
1634 }
1635}
1636
1637EXPORT_SYMBOL(blk_plug_device);
1638
1639
1640
1641
1642
1643int blk_remove_plug(request_queue_t *q)
1644{
1645 WARN_ON(!irqs_disabled());
1646
1647 if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
1648 return 0;
1649
1650 del_timer(&q->unplug_timer);
1651 return 1;
1652}
1653
1654EXPORT_SYMBOL(blk_remove_plug);
1655
1656
1657
1658
1659void __generic_unplug_device(request_queue_t *q)
1660{
1661 if (unlikely(blk_queue_stopped(q)))
1662 return;
1663
1664 if (!blk_remove_plug(q))
1665 return;
1666
1667 q->request_fn(q);
1668}
1669EXPORT_SYMBOL(__generic_unplug_device);
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682void generic_unplug_device(request_queue_t *q)
1683{
1684 spin_lock_irq(q->queue_lock);
1685 __generic_unplug_device(q);
1686 spin_unlock_irq(q->queue_lock);
1687}
1688EXPORT_SYMBOL(generic_unplug_device);
1689
1690static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
1691 struct page *page)
1692{
1693 request_queue_t *q = bdi->unplug_io_data;
1694
1695
1696
1697
1698 if (q->unplug_fn) {
1699 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
1700 q->rq.count[READ] + q->rq.count[WRITE]);
1701
1702 q->unplug_fn(q);
1703 }
1704}
1705
1706static void blk_unplug_work(void *data)
1707{
1708 request_queue_t *q = data;
1709
1710 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
1711 q->rq.count[READ] + q->rq.count[WRITE]);
1712
1713 q->unplug_fn(q);
1714}
1715
1716static void blk_unplug_timeout(unsigned long data)
1717{
1718 request_queue_t *q = (request_queue_t *)data;
1719
1720 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
1721 q->rq.count[READ] + q->rq.count[WRITE]);
1722
1723 kblockd_schedule_work(&q->unplug_work);
1724}
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735void blk_start_queue(request_queue_t *q)
1736{
1737 WARN_ON(!irqs_disabled());
1738
1739 clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1740
1741
1742
1743
1744
1745 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
1746 q->request_fn(q);
1747 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
1748 } else {
1749 blk_plug_device(q);
1750 kblockd_schedule_work(&q->unplug_work);
1751 }
1752}
1753
1754EXPORT_SYMBOL(blk_start_queue);
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770void blk_stop_queue(request_queue_t *q)
1771{
1772 blk_remove_plug(q);
1773 set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1774}
1775EXPORT_SYMBOL(blk_stop_queue);
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791void blk_sync_queue(struct request_queue *q)
1792{
1793 del_timer_sync(&q->unplug_timer);
1794 kblockd_flush();
1795}
1796EXPORT_SYMBOL(blk_sync_queue);
1797
1798
1799
1800
1801
1802void blk_run_queue(struct request_queue *q)
1803{
1804 unsigned long flags;
1805
1806 spin_lock_irqsave(q->queue_lock, flags);
1807 blk_remove_plug(q);
1808
1809
1810
1811
1812
1813 if (!elv_queue_empty(q)) {
1814 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
1815 q->request_fn(q);
1816 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
1817 } else {
1818 blk_plug_device(q);
1819 kblockd_schedule_work(&q->unplug_work);
1820 }
1821 }
1822
1823 spin_unlock_irqrestore(q->queue_lock, flags);
1824}
1825EXPORT_SYMBOL(blk_run_queue);
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842static void blk_release_queue(struct kobject *kobj)
1843{
1844 request_queue_t *q = container_of(kobj, struct request_queue, kobj);
1845 struct request_list *rl = &q->rq;
1846
1847 blk_sync_queue(q);
1848
1849 if (rl->rq_pool)
1850 mempool_destroy(rl->rq_pool);
1851
1852 if (q->queue_tags)
1853 __blk_queue_free_tags(q);
1854
1855 if (q->blk_trace)
1856 blk_trace_shutdown(q);
1857
1858 kmem_cache_free(requestq_cachep, q);
1859}
1860
1861void blk_put_queue(request_queue_t *q)
1862{
1863 kobject_put(&q->kobj);
1864}
1865EXPORT_SYMBOL(blk_put_queue);
1866
1867void blk_cleanup_queue(request_queue_t * q)
1868{
1869 mutex_lock(&q->sysfs_lock);
1870 set_bit(QUEUE_FLAG_DEAD, &q->queue_flags);
1871 mutex_unlock(&q->sysfs_lock);
1872
1873 if (q->elevator)
1874 elevator_exit(q->elevator);
1875
1876 blk_put_queue(q);
1877}
1878
1879EXPORT_SYMBOL(blk_cleanup_queue);
1880
1881static int blk_init_free_list(request_queue_t *q)
1882{
1883 struct request_list *rl = &q->rq;
1884
1885 rl->count[READ] = rl->count[WRITE] = 0;
1886 rl->starved[READ] = rl->starved[WRITE] = 0;
1887 rl->elvpriv = 0;
1888 init_waitqueue_head(&rl->wait[READ]);
1889 init_waitqueue_head(&rl->wait[WRITE]);
1890
1891 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
1892 mempool_free_slab, request_cachep, q->node);
1893
1894 if (!rl->rq_pool)
1895 return -ENOMEM;
1896
1897 return 0;
1898}
1899
1900request_queue_t *blk_alloc_queue(gfp_t gfp_mask)
1901{
1902 return blk_alloc_queue_node(gfp_mask, -1);
1903}
1904EXPORT_SYMBOL(blk_alloc_queue);
1905
1906static struct kobj_type queue_ktype;
1907
1908request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
1909{
1910 request_queue_t *q;
1911
1912 q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id);
1913 if (!q)
1914 return NULL;
1915
1916 memset(q, 0, sizeof(*q));
1917 init_timer(&q->unplug_timer);
1918
1919 snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue");
1920 q->kobj.ktype = &queue_ktype;
1921 kobject_init(&q->kobj);
1922
1923 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
1924 q->backing_dev_info.unplug_io_data = q;
1925
1926 mutex_init(&q->sysfs_lock);
1927
1928 return q;
1929}
1930EXPORT_SYMBOL(blk_alloc_queue_node);
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
1966{
1967 return blk_init_queue_node(rfn, lock, -1);
1968}
1969EXPORT_SYMBOL(blk_init_queue);
1970
1971request_queue_t *
1972blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
1973{
1974 request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
1975
1976 if (!q)
1977 return NULL;
1978
1979 q->node = node_id;
1980 if (blk_init_free_list(q)) {
1981 kmem_cache_free(requestq_cachep, q);
1982 return NULL;
1983 }
1984
1985
1986
1987
1988
1989 if (!lock) {
1990 spin_lock_init(&q->__queue_lock);
1991 lock = &q->__queue_lock;
1992 }
1993
1994 q->request_fn = rfn;
1995 q->back_merge_fn = ll_back_merge_fn;
1996 q->front_merge_fn = ll_front_merge_fn;
1997 q->merge_requests_fn = ll_merge_requests_fn;
1998 q->prep_rq_fn = NULL;
1999 q->unplug_fn = generic_unplug_device;
2000 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);
2001 q->queue_lock = lock;
2002
2003 blk_queue_segment_boundary(q, 0xffffffff);
2004
2005 blk_queue_make_request(q, __make_request);
2006 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
2007
2008 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
2009 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
2010
2011
2012
2013
2014 if (!elevator_init(q, NULL)) {
2015 blk_queue_congestion_threshold(q);
2016 return q;
2017 }
2018
2019 blk_put_queue(q);
2020 return NULL;
2021}
2022EXPORT_SYMBOL(blk_init_queue_node);
2023
2024int blk_get_queue(request_queue_t *q)
2025{
2026 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
2027 kobject_get(&q->kobj);
2028 return 0;
2029 }
2030
2031 return 1;
2032}
2033
2034EXPORT_SYMBOL(blk_get_queue);
2035
2036static inline void blk_free_request(request_queue_t *q, struct request *rq)
2037{
2038 if (rq->flags & REQ_ELVPRIV)
2039 elv_put_request(q, rq);
2040 mempool_free(rq, q->rq.rq_pool);
2041}
2042
2043static inline struct request *
2044blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
2045 int priv, gfp_t gfp_mask)
2046{
2047 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
2048
2049 if (!rq)
2050 return NULL;
2051
2052
2053
2054
2055
2056 rq->flags = rw;
2057
2058 if (priv) {
2059 if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
2060 mempool_free(rq, q->rq.rq_pool);
2061 return NULL;
2062 }
2063 rq->flags |= REQ_ELVPRIV;
2064 }
2065
2066 return rq;
2067}
2068
2069
2070
2071
2072
2073static inline int ioc_batching(request_queue_t *q, struct io_context *ioc)
2074{
2075 if (!ioc)
2076 return 0;
2077
2078
2079
2080
2081
2082
2083 return ioc->nr_batch_requests == q->nr_batching ||
2084 (ioc->nr_batch_requests > 0
2085 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
2086}
2087
2088
2089
2090
2091
2092
2093
2094static void ioc_set_batching(request_queue_t *q, struct io_context *ioc)
2095{
2096 if (!ioc || ioc_batching(q, ioc))
2097 return;
2098
2099 ioc->nr_batch_requests = q->nr_batching;
2100 ioc->last_waited = jiffies;
2101}
2102
2103static void __freed_request(request_queue_t *q, int rw)
2104{
2105 struct request_list *rl = &q->rq;
2106
2107 if (rl->count[rw] < queue_congestion_off_threshold(q))
2108 clear_queue_congested(q, rw);
2109
2110 if (rl->count[rw] + 1 <= q->nr_requests) {
2111 if (waitqueue_active(&rl->wait[rw]))
2112 wake_up(&rl->wait[rw]);
2113
2114 blk_clear_queue_full(q, rw);
2115 }
2116}
2117
2118
2119
2120
2121
2122static void freed_request(request_queue_t *q, int rw, int priv)
2123{
2124 struct request_list *rl = &q->rq;
2125
2126 rl->count[rw]--;
2127 if (priv)
2128 rl->elvpriv--;
2129
2130 __freed_request(q, rw);
2131
2132 if (unlikely(rl->starved[rw ^ 1]))
2133 __freed_request(q, rw ^ 1);
2134}
2135
2136#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
2137
2138
2139
2140
2141
2142static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
2143 gfp_t gfp_mask)
2144{
2145 struct request *rq = NULL;
2146 struct request_list *rl = &q->rq;
2147 struct io_context *ioc = NULL;
2148 int may_queue, priv;
2149
2150 may_queue = elv_may_queue(q, rw, bio);
2151 if (may_queue == ELV_MQUEUE_NO)
2152 goto rq_starved;
2153
2154 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
2155 if (rl->count[rw]+1 >= q->nr_requests) {
2156 ioc = current_io_context(GFP_ATOMIC);
2157
2158
2159
2160
2161
2162
2163 if (!blk_queue_full(q, rw)) {
2164 ioc_set_batching(q, ioc);
2165 blk_set_queue_full(q, rw);
2166 } else {
2167 if (may_queue != ELV_MQUEUE_MUST
2168 && !ioc_batching(q, ioc)) {
2169
2170
2171
2172
2173
2174 goto out;
2175 }
2176 }
2177 }
2178 set_queue_congested(q, rw);
2179 }
2180
2181
2182
2183
2184
2185
2186 if (rl->count[rw] >= (3 * q->nr_requests / 2))
2187 goto out;
2188
2189 rl->count[rw]++;
2190 rl->starved[rw] = 0;
2191
2192 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
2193 if (priv)
2194 rl->elvpriv++;
2195
2196 spin_unlock_irq(q->queue_lock);
2197
2198 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
2199 if (unlikely(!rq)) {
2200
2201
2202
2203
2204
2205
2206
2207 spin_lock_irq(q->queue_lock);
2208 freed_request(q, rw, priv);
2209
2210
2211
2212
2213
2214
2215
2216
2217rq_starved:
2218 if (unlikely(rl->count[rw] == 0))
2219 rl->starved[rw] = 1;
2220
2221 goto out;
2222 }
2223
2224
2225
2226
2227
2228
2229
2230 if (ioc_batching(q, ioc))
2231 ioc->nr_batch_requests--;
2232
2233 rq_init(q, rq);
2234 rq->rl = rl;
2235
2236 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
2237out:
2238 return rq;
2239}
2240
2241
2242
2243
2244
2245
2246
2247static struct request *get_request_wait(request_queue_t *q, int rw,
2248 struct bio *bio)
2249{
2250 struct request *rq;
2251
2252 rq = get_request(q, rw, bio, GFP_NOIO);
2253 while (!rq) {
2254 DEFINE_WAIT(wait);
2255 struct request_list *rl = &q->rq;
2256
2257 prepare_to_wait_exclusive(&rl->wait[rw], &wait,
2258 TASK_UNINTERRUPTIBLE);
2259
2260 rq = get_request(q, rw, bio, GFP_NOIO);
2261
2262 if (!rq) {
2263 struct io_context *ioc;
2264
2265 blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
2266
2267 __generic_unplug_device(q);
2268 spin_unlock_irq(q->queue_lock);
2269 io_schedule();
2270
2271
2272
2273
2274
2275
2276
2277 ioc = current_io_context(GFP_NOIO);
2278 ioc_set_batching(q, ioc);
2279
2280 spin_lock_irq(q->queue_lock);
2281 }
2282 finish_wait(&rl->wait[rw], &wait);
2283 }
2284
2285 return rq;
2286}
2287
2288struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)
2289{
2290 struct request *rq;
2291
2292 BUG_ON(rw != READ && rw != WRITE);
2293
2294 spin_lock_irq(q->queue_lock);
2295 if (gfp_mask & __GFP_WAIT) {
2296 rq = get_request_wait(q, rw, NULL);
2297 } else {
2298 rq = get_request(q, rw, NULL, gfp_mask);
2299 if (!rq)
2300 spin_unlock_irq(q->queue_lock);
2301 }
2302
2303
2304 return rq;
2305}
2306EXPORT_SYMBOL(blk_get_request);
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318void blk_requeue_request(request_queue_t *q, struct request *rq)
2319{
2320 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
2321
2322 if (blk_rq_tagged(rq))
2323 blk_queue_end_tag(q, rq);
2324
2325 elv_requeue_request(q, rq);
2326}
2327
2328EXPORT_SYMBOL(blk_requeue_request);
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349void blk_insert_request(request_queue_t *q, struct request *rq,
2350 int at_head, void *data)
2351{
2352 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2353 unsigned long flags;
2354
2355
2356
2357
2358
2359
2360 rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER;
2361
2362 rq->special = data;
2363
2364 spin_lock_irqsave(q->queue_lock, flags);
2365
2366
2367
2368
2369 if (blk_rq_tagged(rq))
2370 blk_queue_end_tag(q, rq);
2371
2372 drive_stat_acct(rq, rq->nr_sectors, 1);
2373 __elv_add_request(q, rq, where, 0);
2374
2375 if (blk_queue_plugged(q))
2376 __generic_unplug_device(q);
2377 else
2378 q->request_fn(q);
2379 spin_unlock_irqrestore(q->queue_lock, flags);
2380}
2381
2382EXPORT_SYMBOL(blk_insert_request);
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf,
2405 unsigned int len)
2406{
2407 unsigned long uaddr;
2408 struct bio *bio;
2409 int reading;
2410
2411 if (len > (q->max_hw_sectors << 9))
2412 return -EINVAL;
2413 if (!len || !ubuf)
2414 return -EINVAL;
2415
2416 reading = rq_data_dir(rq) == READ;
2417
2418
2419
2420
2421
2422 uaddr = (unsigned long) ubuf;
2423 if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))
2424 bio = bio_map_user(q, NULL, uaddr, len, reading);
2425 else
2426 bio = bio_copy_user(q, uaddr, len, reading);
2427
2428 if (!IS_ERR(bio)) {
2429 rq->bio = rq->biotail = bio;
2430 blk_rq_bio_prep(q, rq, bio);
2431
2432 rq->buffer = rq->data = NULL;
2433 rq->data_len = len;
2434 return 0;
2435 }
2436
2437
2438
2439
2440 return PTR_ERR(bio);
2441}
2442
2443EXPORT_SYMBOL(blk_rq_map_user);
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465int blk_rq_map_user_iov(request_queue_t *q, struct request *rq,
2466 struct sg_iovec *iov, int iov_count)
2467{
2468 struct bio *bio;
2469
2470 if (!iov || iov_count <= 0)
2471 return -EINVAL;
2472
2473
2474
2475
2476 bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ);
2477 if (IS_ERR(bio))
2478 return PTR_ERR(bio);
2479
2480 rq->bio = rq->biotail = bio;
2481 blk_rq_bio_prep(q, rq, bio);
2482 rq->buffer = rq->data = NULL;
2483 rq->data_len = bio->bi_size;
2484 return 0;
2485}
2486
2487EXPORT_SYMBOL(blk_rq_map_user_iov);
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497int blk_rq_unmap_user(struct bio *bio, unsigned int ulen)
2498{
2499 int ret = 0;
2500
2501 if (bio) {
2502 if (bio_flagged(bio, BIO_USER_MAPPED))
2503 bio_unmap_user(bio);
2504 else
2505 ret = bio_uncopy_user(bio);
2506 }
2507
2508 return 0;
2509}
2510
2511EXPORT_SYMBOL(blk_rq_unmap_user);
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,
2522 unsigned int len, gfp_t gfp_mask)
2523{
2524 struct bio *bio;
2525
2526 if (len > (q->max_hw_sectors << 9))
2527 return -EINVAL;
2528 if (!len || !kbuf)
2529 return -EINVAL;
2530
2531 bio = bio_map_kern(q, kbuf, len, gfp_mask);
2532 if (IS_ERR(bio))
2533 return PTR_ERR(bio);
2534
2535 if (rq_data_dir(rq) == WRITE)
2536 bio->bi_rw |= (1 << BIO_RW);
2537
2538 rq->bio = rq->biotail = bio;
2539 blk_rq_bio_prep(q, rq, bio);
2540
2541 rq->buffer = rq->data = NULL;
2542 rq->data_len = len;
2543 return 0;
2544}
2545
2546EXPORT_SYMBOL(blk_rq_map_kern);
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
2561 struct request *rq, int at_head,
2562 rq_end_io_fn *done)
2563{
2564 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2565
2566 rq->rq_disk = bd_disk;
2567 rq->flags |= REQ_NOMERGE;
2568 rq->end_io = done;
2569 WARN_ON(irqs_disabled());
2570 spin_lock_irq(q->queue_lock);
2571 __elv_add_request(q, rq, where, 1);
2572 __generic_unplug_device(q);
2573 spin_unlock_irq(q->queue_lock);
2574}
2575EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
2589 struct request *rq, int at_head)
2590{
2591 DECLARE_COMPLETION_ONSTACK(wait);
2592 char sense[SCSI_SENSE_BUFFERSIZE];
2593 int err = 0;
2594
2595
2596
2597
2598
2599 rq->ref_count++;
2600
2601 if (!rq->sense) {
2602 memset(sense, 0, sizeof(sense));
2603 rq->sense = sense;
2604 rq->sense_len = 0;
2605 }
2606
2607 rq->waiting = &wait;
2608 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
2609 wait_for_completion(&wait);
2610 rq->waiting = NULL;
2611
2612 if (rq->errors)
2613 err = -EIO;
2614
2615 return err;
2616}
2617
2618EXPORT_SYMBOL(blk_execute_rq);
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
2631{
2632 request_queue_t *q;
2633
2634 if (bdev->bd_disk == NULL)
2635 return -ENXIO;
2636
2637 q = bdev_get_queue(bdev);
2638 if (!q)
2639 return -ENXIO;
2640 if (!q->issue_flush_fn)
2641 return -EOPNOTSUPP;
2642
2643 return q->issue_flush_fn(q, bdev->bd_disk, error_sector);
2644}
2645
2646EXPORT_SYMBOL(blkdev_issue_flush);
2647
2648static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
2649{
2650 int rw = rq_data_dir(rq);
2651
2652 if (!blk_fs_request(rq) || !rq->rq_disk)
2653 return;
2654
2655 if (!new_io) {
2656 __disk_stat_inc(rq->rq_disk, merges[rw]);
2657 } else {
2658 disk_round_stats(rq->rq_disk);
2659 rq->rq_disk->in_flight++;
2660 }
2661}
2662
2663
2664
2665
2666
2667
2668static inline void add_request(request_queue_t * q, struct request * req)
2669{
2670 drive_stat_acct(req, req->nr_sectors, 1);
2671
2672 if (q->activity_fn)
2673 q->activity_fn(q->activity_data, rq_data_dir(req));
2674
2675
2676
2677
2678
2679 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
2680}
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697void disk_round_stats(struct gendisk *disk)
2698{
2699 unsigned long now = jiffies;
2700
2701 if (now == disk->stamp)
2702 return;
2703
2704 if (disk->in_flight) {
2705 __disk_stat_add(disk, time_in_queue,
2706 disk->in_flight * (now - disk->stamp));
2707 __disk_stat_add(disk, io_ticks, (now - disk->stamp));
2708 }
2709 disk->stamp = now;
2710}
2711
2712EXPORT_SYMBOL_GPL(disk_round_stats);
2713
2714
2715
2716
2717void __blk_put_request(request_queue_t *q, struct request *req)
2718{
2719 struct request_list *rl = req->rl;
2720
2721 if (unlikely(!q))
2722 return;
2723 if (unlikely(--req->ref_count))
2724 return;
2725
2726 elv_completed_request(q, req);
2727
2728 req->rq_status = RQ_INACTIVE;
2729 req->rl = NULL;
2730
2731
2732
2733
2734
2735 if (rl) {
2736 int rw = rq_data_dir(req);
2737 int priv = req->flags & REQ_ELVPRIV;
2738
2739 BUG_ON(!list_empty(&req->queuelist));
2740
2741 blk_free_request(q, req);
2742 freed_request(q, rw, priv);
2743 }
2744}
2745
2746EXPORT_SYMBOL_GPL(__blk_put_request);
2747
2748void blk_put_request(struct request *req)
2749{
2750 unsigned long flags;
2751 request_queue_t *q = req->q;
2752
2753
2754
2755
2756
2757 if (q) {
2758 spin_lock_irqsave(q->queue_lock, flags);
2759 __blk_put_request(q, req);
2760 spin_unlock_irqrestore(q->queue_lock, flags);
2761 }
2762}
2763
2764EXPORT_SYMBOL(blk_put_request);
2765
2766
2767
2768
2769
2770
2771void blk_end_sync_rq(struct request *rq, int error)
2772{
2773 struct completion *waiting = rq->waiting;
2774
2775 rq->waiting = NULL;
2776 __blk_put_request(rq->q, rq);
2777
2778
2779
2780
2781
2782 complete(waiting);
2783}
2784EXPORT_SYMBOL(blk_end_sync_rq);
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795long blk_congestion_wait(int rw, long timeout)
2796{
2797 long ret;
2798 DEFINE_WAIT(wait);
2799 wait_queue_head_t *wqh = &congestion_wqh[rw];
2800
2801 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
2802 ret = io_schedule_timeout(timeout);
2803 finish_wait(wqh, &wait);
2804 return ret;
2805}
2806
2807EXPORT_SYMBOL(blk_congestion_wait);
2808
2809
2810
2811
2812static int attempt_merge(request_queue_t *q, struct request *req,
2813 struct request *next)
2814{
2815 if (!rq_mergeable(req) || !rq_mergeable(next))
2816 return 0;
2817
2818
2819
2820
2821 if (req->sector + req->nr_sectors != next->sector)
2822 return 0;
2823
2824 if (rq_data_dir(req) != rq_data_dir(next)
2825 || req->rq_disk != next->rq_disk
2826 || next->waiting || next->special)
2827 return 0;
2828
2829
2830
2831
2832
2833
2834
2835 if (!q->merge_requests_fn(q, req, next))
2836 return 0;
2837
2838
2839
2840
2841
2842
2843
2844 if (time_after(req->start_time, next->start_time))
2845 req->start_time = next->start_time;
2846
2847 req->biotail->bi_next = next->bio;
2848 req->biotail = next->biotail;
2849
2850 req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
2851
2852 elv_merge_requests(q, req, next);
2853
2854 if (req->rq_disk) {
2855 disk_round_stats(req->rq_disk);
2856 req->rq_disk->in_flight--;
2857 }
2858
2859 req->ioprio = ioprio_best(req->ioprio, next->ioprio);
2860
2861 __blk_put_request(q, next);
2862 return 1;
2863}
2864
2865static inline int attempt_back_merge(request_queue_t *q, struct request *rq)
2866{
2867 struct request *next = elv_latter_request(q, rq);
2868
2869 if (next)
2870 return attempt_merge(q, rq, next);
2871
2872 return 0;
2873}
2874
2875static inline int attempt_front_merge(request_queue_t *q, struct request *rq)
2876{
2877 struct request *prev = elv_former_request(q, rq);
2878
2879 if (prev)
2880 return attempt_merge(q, prev, rq);
2881
2882 return 0;
2883}
2884
2885static void init_request_from_bio(struct request *req, struct bio *bio)
2886{
2887 req->flags |= REQ_CMD;
2888
2889
2890
2891
2892 if (bio_rw_ahead(bio) || bio_failfast(bio))
2893 req->flags |= REQ_FAILFAST;
2894
2895
2896
2897
2898 if (unlikely(bio_barrier(bio)))
2899 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2900
2901 if (bio_sync(bio))
2902 req->flags |= REQ_RW_SYNC;
2903
2904 req->errors = 0;
2905 req->hard_sector = req->sector = bio->bi_sector;
2906 req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio);
2907 req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio);
2908 req->nr_phys_segments = bio_phys_segments(req->q, bio);
2909 req->nr_hw_segments = bio_hw_segments(req->q, bio);
2910 req->buffer = bio_data(bio);
2911 req->waiting = NULL;
2912 req->bio = req->biotail = bio;
2913 req->ioprio = bio_prio(bio);
2914 req->rq_disk = bio->bi_bdev->bd_disk;
2915 req->start_time = jiffies;
2916}
2917
2918static int __make_request(request_queue_t *q, struct bio *bio)
2919{
2920 struct request *req;
2921 int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
2922 unsigned short prio;
2923 sector_t sector;
2924
2925 sector = bio->bi_sector;
2926 nr_sectors = bio_sectors(bio);
2927 cur_nr_sectors = bio_cur_sectors(bio);
2928 prio = bio_prio(bio);
2929
2930 rw = bio_data_dir(bio);
2931 sync = bio_sync(bio);
2932
2933
2934
2935
2936
2937
2938 blk_queue_bounce(q, &bio);
2939
2940 spin_lock_prefetch(q->queue_lock);
2941
2942 barrier = bio_barrier(bio);
2943 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
2944 err = -EOPNOTSUPP;
2945 goto end_io;
2946 }
2947
2948 spin_lock_irq(q->queue_lock);
2949
2950 if (unlikely(barrier) || elv_queue_empty(q))
2951 goto get_rq;
2952
2953 el_ret = elv_merge(q, &req, bio);
2954 switch (el_ret) {
2955 case ELEVATOR_BACK_MERGE:
2956 BUG_ON(!rq_mergeable(req));
2957
2958 if (!q->back_merge_fn(q, req, bio))
2959 break;
2960
2961 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
2962
2963 req->biotail->bi_next = bio;
2964 req->biotail = bio;
2965 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2966 req->ioprio = ioprio_best(req->ioprio, prio);
2967 drive_stat_acct(req, nr_sectors, 0);
2968 if (!attempt_back_merge(q, req))
2969 elv_merged_request(q, req);
2970 goto out;
2971
2972 case ELEVATOR_FRONT_MERGE:
2973 BUG_ON(!rq_mergeable(req));
2974
2975 if (!q->front_merge_fn(q, req, bio))
2976 break;
2977
2978 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);