1
2
3
4
5
6
7
8
9
10#include <linux/kernel.h>
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/blkdev.h>
15#include <linux/backing-dev.h>
16#include <linux/task_io_accounting_ops.h>
17#include <linux/pagevec.h>
18
19void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
20{
21}
22EXPORT_SYMBOL(default_unplug_io_fn);
23
24struct backing_dev_info default_backing_dev_info = {
25 .ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE,
26 .state = 0,
27 .unplug_io_fn = default_unplug_io_fn,
28};
29EXPORT_SYMBOL_GPL(default_backing_dev_info);
30
31
32
33
34
35void
36file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
37{
38 ra->ra_pages = mapping->backing_dev_info->ra_pages;
39 ra->average = ra->ra_pages / 2;
40}
41
42
43
44
45static inline unsigned long get_max_readahead(struct file_ra_state *ra)
46{
47 return ra->ra_pages;
48}
49
50static inline unsigned long get_min_readahead(struct file_ra_state *ra)
51{
52 return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE;
53}
54
55#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
56
57
58
59
60
61
62
63
64
65
66
67
68int read_cache_pages(struct address_space *mapping, struct list_head *pages,
69 int (*filler)(void *, struct page *), void *data)
70{
71 struct page *page;
72 struct pagevec lru_pvec;
73 int ret = 0;
74
75 pagevec_init(&lru_pvec, 0);
76
77 while (!list_empty(pages)) {
78 page = list_to_page(pages);
79 list_del(&page->lru);
80 if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) {
81 page_cache_release(page);
82 continue;
83 }
84 ret = filler(data, page);
85 if (!pagevec_add(&lru_pvec, page))
86 __pagevec_lru_add(&lru_pvec);
87 if (ret) {
88 while (!list_empty(pages)) {
89 struct page *victim;
90
91 victim = list_to_page(pages);
92 list_del(&victim->lru);
93 page_cache_release(victim);
94 }
95 break;
96 }
97 task_io_account_read(PAGE_CACHE_SIZE);
98 }
99 pagevec_lru_add(&lru_pvec);
100 return ret;
101}
102
103EXPORT_SYMBOL(read_cache_pages);
104
105static int read_pages(struct address_space *mapping, struct file *filp,
106 struct list_head *pages, unsigned nr_pages)
107{
108 unsigned page_idx;
109 struct pagevec lru_pvec;
110 int ret = 0;
111
112 if (mapping->a_ops->readpages) {
113 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
114 goto out;
115 }
116
117 pagevec_init(&lru_pvec, 0);
118 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
119 struct page *page = list_to_page(pages);
120 list_del(&page->lru);
121 if (!add_to_page_cache(page, mapping,
122 page->index, GFP_KERNEL)) {
123 mapping->a_ops->readpage(filp, page);
124 if (!pagevec_add(&lru_pvec, page))
125 __pagevec_lru_add(&lru_pvec);
126 } else {
127 page_cache_release(page);
128 }
129 }
130 pagevec_lru_add(&lru_pvec);
131out:
132 return ret;
133}
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218static inline int
219__do_page_cache_readahead(struct address_space *mapping, struct file *filp,
220 unsigned long offset, unsigned long nr_to_read)
221{
222 struct inode *inode = mapping->host;
223 struct page *page;
224 unsigned long end_index;
225 LIST_HEAD(page_pool);
226 int page_idx;
227 int ret = 0;
228 loff_t isize = i_size_read(inode);
229
230 if (isize == 0)
231 goto out;
232
233 end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
234
235
236
237
238 spin_lock_irq(&mapping->tree_lock);
239 for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
240 unsigned long page_offset = offset + page_idx;
241
242 if (page_offset > end_index)
243 break;
244
245 page = radix_tree_lookup(&mapping->page_tree, page_offset);
246 if (page)
247 continue;
248
249 spin_unlock_irq(&mapping->tree_lock);
250 page = page_cache_alloc_cold(mapping);
251 spin_lock_irq(&mapping->tree_lock);
252 if (!page)
253 break;
254 page->index = page_offset;
255 list_add(&page->lru, &page_pool);
256 ret++;
257 }
258 spin_unlock_irq(&mapping->tree_lock);
259
260
261
262
263
264
265 if (ret)
266 read_pages(mapping, filp, &page_pool, ret);
267 BUG_ON(!list_empty(&page_pool));
268out:
269 return ret;
270}
271
272
273
274
275
276int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
277 unsigned long offset, unsigned long nr_to_read)
278{
279 int ret = 0;
280
281 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
282 return -EINVAL;
283
284 while (nr_to_read) {
285 int err;
286
287 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_CACHE_SIZE;
288
289 if (this_chunk > nr_to_read)
290 this_chunk = nr_to_read;
291 err = __do_page_cache_readahead(mapping, filp,
292 offset, this_chunk);
293 if (err < 0) {
294 ret = err;
295 break;
296 }
297 ret += err;
298 offset += this_chunk;
299 nr_to_read -= this_chunk;
300 }
301 return ret;
302}
303
304
305
306
307
308
309
310
311int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
312 unsigned long offset, unsigned long nr_to_read)
313{
314 if (!bdi_read_congested(mapping->backing_dev_info))
315 return __do_page_cache_readahead(mapping, filp,
316 offset, nr_to_read);
317 return 0;
318}
319
320
321
322
323
324
325
326
327
328static inline void
329check_ra_success(struct file_ra_state *ra, pgoff_t attempt,
330 pgoff_t actual, pgoff_t orig_next_size)
331{
332 if (actual == 0) {
333 if (orig_next_size > 1) {
334 ra->next_size = orig_next_size - 1;
335 if (ra->ahead_size)
336 ra->ahead_size = ra->next_size;
337 } else {
338 ra->next_size = -1UL;
339 ra->size = 0;
340 }
341 }
342}
343
344
345
346
347
348
349
350
351
352
353static inline void
354check_ra_reenable(struct file_ra_state *ra, unsigned long offset,
355 const unsigned long max, unsigned long nr_read)
356{
357 if (offset != ra->prev_page + 1) {
358 ra->size = ra->size?ra->size-1:0;
359 } else {
360 ra->size += nr_read;
361 if (ra->size >= max) {
362 ra->start = offset - max;
363 ra->next_size = max;
364 ra->size = max;
365 ra->ahead_start = 0;
366 ra->ahead_size = 0;
367 ra->average = max / 2;
368 }
369 }
370}
371
372
373
374
375
376unsigned long
377page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
378 struct file *filp, unsigned long offset,
379 unsigned long nr_to_read)
380{
381 unsigned max;
382 unsigned orig_next_size;
383 unsigned actual;
384 int first_access=0;
385 unsigned long average;
386
387
388
389
390
391
392
393
394 if (offset == ra->prev_page) {
395 if (ra->next_size != 0)
396 goto out;
397 }
398
399 max = get_max_readahead(ra);
400 if (max == 0)
401 goto out;
402
403 if (ra->next_size == -1UL)
404 goto ra_off;
405
406 orig_next_size = ra->next_size;
407
408 if (ra->next_size == 0) {
409
410
411
412
413
414 first_access=1;
415 ra->next_size = max / 2;
416 ra->prev_page = offset;
417 ra->currnt_wnd_hit++;
418 goto do_io;
419 }
420
421 ra->prev_page = offset;
422
423 if (offset >= ra->start && offset <= (ra->start + ra->size)) {
424
425
426
427
428 ra->next_size += 2;
429
430 if (ra->currnt_wnd_hit <= (max * 2))
431 ra->currnt_wnd_hit++;
432 } else {
433
434
435
436
437 ra->next_size -= 2;
438
439 average = ra->average;
440 if (average < ra->currnt_wnd_hit) {
441 average++;
442 }
443 ra->average = (average + ra->currnt_wnd_hit) / 2;
444 ra->currnt_wnd_hit = 1;
445 }
446
447 if ((long)ra->next_size > (long)max)
448 ra->next_size = max;
449 if ((long)ra->next_size <= 0L) {
450 ra->next_size = -1UL;
451 ra->size = 0;
452 goto ra_off;
453 }
454
455
456
457
458 if (offset < ra->start || offset >= (ra->start + ra->size)) {
459
460
461
462
463 if (offset == ra->ahead_start) {
464
465
466
467
468 ra->start = ra->ahead_start;
469 ra->size = ra->ahead_size;
470 ra->prev_page = ra->start;
471 ra->ahead_start = 0;
472 ra->ahead_size = 0;
473
474
475
476
477
478
479
480
481 goto out;
482 }
483do_io:
484
485
486
487
488
489
490 if (!first_access) {
491
492
493
494
495
496 average = ra->average;
497 if (ra->currnt_wnd_hit > average)
498 average = (ra->currnt_wnd_hit + ra->average + 1) / 2;
499
500 ra->next_size = min(average , (unsigned long)max);
501 }
502 ra->start = offset;
503 ra->size = ra->next_size;
504 if (ra->size < nr_to_read)
505 ra->size = min(nr_to_read, (unsigned long)max);
506 ra->prev_page = offset + ra->size - 1;
507 ra->ahead_start = 0;
508 ra->ahead_size = 0;
509 actual = do_page_cache_readahead(mapping, filp, offset,
510 ra->size);
511 if(!first_access) {
512
513
514
515
516
517 check_ra_success(ra, ra->size, actual, orig_next_size);
518 }
519 } else {
520
521
522
523
524
525 if (ra->ahead_start == 0) {
526
527
528
529
530
531
532
533
534
535 average = ra->average;
536 if (ra->currnt_wnd_hit > average)
537 average = (ra->currnt_wnd_hit + ra->average + 1) / 2;
538
539 if (average > max) {
540 ra->ahead_start = ra->start + ra->size;
541 ra->ahead_size = ra->next_size;
542 actual = do_page_cache_readahead(mapping, filp,
543 ra->ahead_start, ra->ahead_size);
544 check_ra_success(ra, ra->ahead_size,
545 actual, orig_next_size);
546 }
547 }
548 }
549out:
550 return ra->prev_page + 1;
551
552ra_off:
553
554
555
556
557
558
559 if (nr_to_read > 1) {
560 nr_to_read = min(nr_to_read, (unsigned long) max);
561 check_ra_reenable(ra, offset, max, nr_to_read);
562 ra->prev_page = offset + nr_to_read - 1;
563 do_page_cache_readahead(mapping, filp, offset, nr_to_read);
564 }
565 goto out;
566}
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584void handle_ra_miss(struct address_space *mapping,
585 struct file_ra_state *ra, pgoff_t offset)
586{
587 if (ra->next_size == -1UL) {
588 const unsigned long max = get_max_readahead(ra);
589
590 check_ra_reenable(ra, offset, max, 1);
591 ra->prev_page = offset;
592 } else {
593 const unsigned long min = get_min_readahead(ra);
594
595 ra->next_size -= 3;
596 if (ra->next_size < min)
597 ra->next_size = min;
598 }
599}
600
601
602
603
604
605unsigned long max_sane_readahead(unsigned long nr)
606{
607 unsigned long active;
608 unsigned long inactive;
609 unsigned long free;
610
611 __get_zone_counts(&active, &inactive, &free, NODE_DATA(numa_node_id()));
612 return min(nr, (inactive + free) / 2);
613}
614