RHEL5/block/elevator.c
<<
>>
Prefs
   1/*
   2 *  Block device elevator/IO-scheduler.
   3 *
   4 *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
   5 *
   6 * 30042000 Jens Axboe <axboe@suse.de> :
   7 *
   8 * Split the elevator a bit so that it is possible to choose a different
   9 * one or even write a new "plug in". There are three pieces:
  10 * - elevator_fn, inserts a new request in the queue list
  11 * - elevator_merge_fn, decides whether a new buffer can be merged with
  12 *   an existing request
  13 * - elevator_dequeue_fn, called when a request is taken off the active list
  14 *
  15 * 20082000 Dave Jones <davej@suse.de> :
  16 * Removed tests for max-bomb-segments, which was breaking elvtune
  17 *  when run without -bN
  18 *
  19 * Jens:
  20 * - Rework again to work with bio instead of buffer_heads
  21 * - loose bi_dev comparisons, partition handling is right now
  22 * - completely modularize elevator setup and teardown
  23 *
  24 */
  25#include <linux/kernel.h>
  26#include <linux/fs.h>
  27#include <linux/blkdev.h>
  28#include <linux/elevator.h>
  29#include <linux/bio.h>
  30#include <linux/module.h>
  31#include <linux/slab.h>
  32#include <linux/init.h>
  33#include <linux/compiler.h>
  34#include <linux/delay.h>
  35#include <linux/blktrace_api.h>
  36
  37#include <asm/uaccess.h>
  38
  39static DEFINE_SPINLOCK(elv_list_lock);
  40static LIST_HEAD(elv_list);
  41
  42/*
  43 * can we safely merge with this request?
  44 */
  45inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
  46{
  47        if (!rq_mergeable(rq))
  48                return 0;
  49
  50        /*
  51         * different data direction or already started, don't merge
  52         */
  53        if (bio_data_dir(bio) != rq_data_dir(rq))
  54                return 0;
  55
  56        /*
  57         * same device and no special stuff set, merge is ok
  58         */
  59        if (rq->rq_disk == bio->bi_bdev->bd_disk &&
  60            !rq->waiting && !rq->special)
  61                return 1;
  62
  63        return 0;
  64}
  65EXPORT_SYMBOL(elv_rq_merge_ok);
  66
  67static inline int elv_try_merge(struct request *__rq, struct bio *bio)
  68{
  69        int ret = ELEVATOR_NO_MERGE;
  70
  71        /*
  72         * we can merge and sequence is ok, check if it's possible
  73         */
  74        if (elv_rq_merge_ok(__rq, bio)) {
  75                if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
  76                        ret = ELEVATOR_BACK_MERGE;
  77                else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
  78                        ret = ELEVATOR_FRONT_MERGE;
  79        }
  80
  81        return ret;
  82}
  83
  84static struct elevator_type *elevator_find(const char *name)
  85{
  86        struct elevator_type *e = NULL;
  87        struct list_head *entry;
  88
  89        list_for_each(entry, &elv_list) {
  90                struct elevator_type *__e;
  91
  92                __e = list_entry(entry, struct elevator_type, list);
  93
  94                if (!strcmp(__e->elevator_name, name)) {
  95                        e = __e;
  96                        break;
  97                }
  98        }
  99
 100        return e;
 101}
 102
 103static void elevator_put(struct elevator_type *e)
 104{
 105        module_put(e->elevator_owner);
 106}
 107
 108static struct elevator_type *elevator_get(const char *name)
 109{
 110        struct elevator_type *e;
 111
 112        spin_lock_irq(&elv_list_lock);
 113
 114        e = elevator_find(name);
 115        if (e && !try_module_get(e->elevator_owner))
 116                e = NULL;
 117
 118        spin_unlock_irq(&elv_list_lock);
 119
 120        return e;
 121}
 122
 123static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq)
 124{
 125        return eq->ops->elevator_init_fn(q, eq);
 126}
 127
 128static void elevator_attach(request_queue_t *q, struct elevator_queue *eq,
 129                           void *data)
 130{
 131        q->elevator = eq;
 132        eq->elevator_data = data;
 133}
 134
 135static char chosen_elevator[16];
 136
 137static int __init elevator_setup(char *str)
 138{
 139        /*
 140         * Be backwards-compatible with previous kernels, so users
 141         * won't get the wrong elevator.
 142         */
 143        if (!strcmp(str, "as"))
 144                strcpy(chosen_elevator, "anticipatory");
 145        else
 146                strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
 147        return 1;
 148}
 149
 150__setup("elevator=", elevator_setup);
 151
 152static struct kobj_type elv_ktype;
 153
 154static elevator_t *elevator_alloc(struct elevator_type *e)
 155{
 156        elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
 157        if (eq) {
 158                memset(eq, 0, sizeof(*eq));
 159                eq->ops = &e->ops;
 160                eq->elevator_type = e;
 161                kobject_init(&eq->kobj);
 162                snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
 163                eq->kobj.ktype = &elv_ktype;
 164                mutex_init(&eq->sysfs_lock);
 165        } else {
 166                elevator_put(e);
 167        }
 168        return eq;
 169}
 170
 171static void elevator_release(struct kobject *kobj)
 172{
 173        elevator_t *e = container_of(kobj, elevator_t, kobj);
 174        elevator_put(e->elevator_type);
 175        kfree(e);
 176}
 177
 178int elevator_init(request_queue_t *q, char *name)
 179{
 180        struct elevator_type *e = NULL;
 181        struct elevator_queue *eq;
 182        int ret = 0;
 183        void *data;
 184
 185        INIT_LIST_HEAD(&q->queue_head);
 186        q->last_merge = NULL;
 187        q->end_sector = 0;
 188        q->boundary_rq = NULL;
 189
 190        if (name && !(e = elevator_get(name)))
 191                return -EINVAL;
 192
 193        if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
 194                printk("I/O scheduler %s not found\n", chosen_elevator);
 195
 196        if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
 197                printk("Default I/O scheduler not found, using no-op\n");
 198                e = elevator_get("noop");
 199        }
 200
 201        eq = elevator_alloc(e);
 202        if (!eq)
 203                return -ENOMEM;
 204
 205        data = elevator_init_queue(q, eq);
 206        if (!data) {
 207                kobject_put(&eq->kobj);
 208                return -ENOMEM;
 209        }
 210
 211        elevator_attach(q, eq, data);
 212        return ret;
 213}
 214
 215void elevator_exit(elevator_t *e)
 216{
 217        mutex_lock(&e->sysfs_lock);
 218        if (e->ops->elevator_exit_fn)
 219                e->ops->elevator_exit_fn(e);
 220        e->ops = NULL;
 221        mutex_unlock(&e->sysfs_lock);
 222
 223        kobject_put(&e->kobj);
 224}
 225
 226/*
 227 * Insert rq into dispatch queue of q.  Queue lock must be held on
 228 * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
 229 * appended to the dispatch queue.  To be used by specific elevators.
 230 */
 231void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 232{
 233        sector_t boundary;
 234        struct list_head *entry;
 235
 236        if (q->last_merge == rq)
 237                q->last_merge = NULL;
 238        q->nr_sorted--;
 239
 240        boundary = q->end_sector;
 241
 242        list_for_each_prev(entry, &q->queue_head) {
 243                struct request *pos = list_entry_rq(entry);
 244
 245                if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
 246                        break;
 247                if (rq->sector >= boundary) {
 248                        if (pos->sector < boundary)
 249                                continue;
 250                } else {
 251                        if (pos->sector >= boundary)
 252                                break;
 253                }
 254                if (rq->sector >= pos->sector)
 255                        break;
 256        }
 257
 258        list_add(&rq->queuelist, entry);
 259}
 260
 261int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 262{
 263        elevator_t *e = q->elevator;
 264        int ret;
 265
 266        if (q->last_merge) {
 267                ret = elv_try_merge(q->last_merge, bio);
 268                if (ret != ELEVATOR_NO_MERGE) {
 269                        *req = q->last_merge;
 270                        return ret;
 271                }
 272        }
 273
 274        if (e->ops->elevator_merge_fn)
 275                return e->ops->elevator_merge_fn(q, req, bio);
 276
 277        return ELEVATOR_NO_MERGE;
 278}
 279
 280void elv_merged_request(request_queue_t *q, struct request *rq)
 281{
 282        elevator_t *e = q->elevator;
 283
 284        if (e->ops->elevator_merged_fn)
 285                e->ops->elevator_merged_fn(q, rq);
 286
 287        q->last_merge = rq;
 288}
 289
 290void elv_merge_requests(request_queue_t *q, struct request *rq,
 291                             struct request *next)
 292{
 293        elevator_t *e = q->elevator;
 294
 295        if (e->ops->elevator_merge_req_fn)
 296                e->ops->elevator_merge_req_fn(q, rq, next);
 297        q->nr_sorted--;
 298
 299        q->last_merge = rq;
 300}
 301
 302void elv_requeue_request(request_queue_t *q, struct request *rq)
 303{
 304        elevator_t *e = q->elevator;
 305
 306        /*
 307         * it already went through dequeue, we need to decrement the
 308         * in_flight count again
 309         */
 310        if (blk_account_rq(rq)) {
 311                q->in_flight--;
 312                if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
 313                        e->ops->elevator_deactivate_req_fn(q, rq);
 314        }
 315
 316        rq->flags &= ~REQ_STARTED;
 317
 318        elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
 319}
 320
 321static void elv_drain_elevator(request_queue_t *q)
 322{
 323        static int printed;
 324        while (q->elevator->ops->elevator_dispatch_fn(q, 1))
 325                ;
 326        if (q->nr_sorted == 0)
 327                return;
 328        if (printed++ < 10) {
 329                printk(KERN_ERR "%s: forced dispatching is broken "
 330                       "(nr_sorted=%u), please report this\n",
 331                       q->elevator->elevator_type->elevator_name, q->nr_sorted);
 332        }
 333}
 334
 335void elv_insert(request_queue_t *q, struct request *rq, int where)
 336{
 337        struct list_head *pos;
 338        unsigned ordseq;
 339        int unplug_it = 1;
 340
 341        blk_add_trace_rq(q, rq, BLK_TA_INSERT);
 342
 343        rq->q = q;
 344
 345        switch (where) {
 346        case ELEVATOR_INSERT_FRONT:
 347                rq->flags |= REQ_SOFTBARRIER;
 348
 349                list_add(&rq->queuelist, &q->queue_head);
 350                break;
 351
 352        case ELEVATOR_INSERT_BACK:
 353                rq->flags |= REQ_SOFTBARRIER;
 354                elv_drain_elevator(q);
 355                list_add_tail(&rq->queuelist, &q->queue_head);
 356                /*
 357                 * We kick the queue here for the following reasons.
 358                 * - The elevator might have returned NULL previously
 359                 *   to delay requests and returned them now.  As the
 360                 *   queue wasn't empty before this request, ll_rw_blk
 361                 *   won't run the queue on return, resulting in hang.
 362                 * - Usually, back inserted requests won't be merged
 363                 *   with anything.  There's no point in delaying queue
 364                 *   processing.
 365                 */
 366                blk_remove_plug(q);
 367                q->request_fn(q);
 368                break;
 369
 370        case ELEVATOR_INSERT_SORT:
 371                BUG_ON(!blk_fs_request(rq));
 372                rq->flags |= REQ_SORTED;
 373                q->nr_sorted++;
 374                if (q->last_merge == NULL && rq_mergeable(rq))
 375                        q->last_merge = rq;
 376                /*
 377                 * Some ioscheds (cfq) run q->request_fn directly, so
 378                 * rq cannot be accessed after calling
 379                 * elevator_add_req_fn.
 380                 */
 381                q->elevator->ops->elevator_add_req_fn(q, rq);
 382                break;
 383
 384        case ELEVATOR_INSERT_REQUEUE:
 385                /*
 386                 * If ordered flush isn't in progress, we do front
 387                 * insertion; otherwise, requests should be requeued
 388                 * in ordseq order.
 389                 */
 390                rq->flags |= REQ_SOFTBARRIER;
 391
 392                if (q->ordseq == 0) {
 393                        list_add(&rq->queuelist, &q->queue_head);
 394                        break;
 395                }
 396
 397                ordseq = blk_ordered_req_seq(rq);
 398
 399                list_for_each(pos, &q->queue_head) {
 400                        struct request *pos_rq = list_entry_rq(pos);
 401                        if (ordseq <= blk_ordered_req_seq(pos_rq))
 402                                break;
 403                }
 404
 405                list_add_tail(&rq->queuelist, pos);
 406                /*
 407                 * most requeues happen because of a busy condition, don't
 408                 * force unplug of the queue for that case.
 409                 */
 410                unplug_it = 0;
 411                break;
 412
 413        default:
 414                printk(KERN_ERR "%s: bad insertion point %d\n",
 415                       __FUNCTION__, where);
 416                BUG();
 417        }
 418
 419        if (unplug_it && blk_queue_plugged(q)) {
 420                int nrq = q->rq.count[READ] + q->rq.count[WRITE]
 421                        - q->in_flight;
 422
 423                if (nrq >= q->unplug_thresh)
 424                        __generic_unplug_device(q);
 425        }
 426}
 427
 428void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 429                       int plug)
 430{
 431        if (q->ordcolor)
 432                rq->flags |= REQ_ORDERED_COLOR;
 433
 434        if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
 435                /*
 436                 * toggle ordered color
 437                 */
 438                if (blk_barrier_rq(rq))
 439                        q->ordcolor ^= 1;
 440
 441                /*
 442                 * barriers implicitly indicate back insertion
 443                 */
 444                if (where == ELEVATOR_INSERT_SORT)
 445                        where = ELEVATOR_INSERT_BACK;
 446
 447                /*
 448                 * this request is scheduling boundary, update
 449                 * end_sector
 450                 */
 451                if (blk_fs_request(rq)) {
 452                        q->end_sector = rq_end_sector(rq);
 453                        q->boundary_rq = rq;
 454                }
 455        } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
 456                where = ELEVATOR_INSERT_BACK;
 457
 458        if (plug)
 459                blk_plug_device(q);
 460
 461        elv_insert(q, rq, where);
 462}
 463
 464void elv_add_request(request_queue_t *q, struct request *rq, int where,
 465                     int plug)
 466{
 467        unsigned long flags;
 468
 469        spin_lock_irqsave(q->queue_lock, flags);
 470        __elv_add_request(q, rq, where, plug);
 471        spin_unlock_irqrestore(q->queue_lock, flags);
 472}
 473
 474static inline struct request *__elv_next_request(request_queue_t *q)
 475{
 476        struct request *rq;
 477
 478        while (1) {
 479                while (!list_empty(&q->queue_head)) {
 480                        rq = list_entry_rq(q->queue_head.next);
 481                        if (blk_do_ordered(q, &rq))
 482                                return rq;
 483                }
 484
 485                if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
 486                        return NULL;
 487        }
 488}
 489
 490struct request *elv_next_request(request_queue_t *q)
 491{
 492        struct request *rq;
 493        int ret;
 494
 495        while ((rq = __elv_next_request(q)) != NULL) {
 496                if (!(rq->flags & REQ_STARTED)) {
 497                        elevator_t *e = q->elevator;
 498
 499                        /*
 500                         * This is the first time the device driver
 501                         * sees this request (possibly after
 502                         * requeueing).  Notify IO scheduler.
 503                         */
 504                        if (blk_sorted_rq(rq) &&
 505                            e->ops->elevator_activate_req_fn)
 506                                e->ops->elevator_activate_req_fn(q, rq);
 507
 508                        /*
 509                         * just mark as started even if we don't start
 510                         * it, a request that has been delayed should
 511                         * not be passed by new incoming requests
 512                         */
 513                        rq->flags |= REQ_STARTED;
 514                        blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
 515                }
 516
 517                if (!q->boundary_rq || q->boundary_rq == rq) {
 518                        q->end_sector = rq_end_sector(rq);
 519                        q->boundary_rq = NULL;
 520                }
 521
 522                if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
 523                        break;
 524
 525                ret = q->prep_rq_fn(q, rq);
 526                if (ret == BLKPREP_OK) {
 527                        break;
 528                } else if (ret == BLKPREP_DEFER) {
 529                        /*
 530                         * the request may have been (partially) prepped.
 531                         * we need to keep this request in the front to
 532                         * avoid resource deadlock.  REQ_STARTED will
 533                         * prevent other fs requests from passing this one.
 534                         */
 535                        rq = NULL;
 536                        break;
 537                } else if (ret == BLKPREP_KILL) {
 538                        int nr_bytes = rq->hard_nr_sectors << 9;
 539
 540                        if (!nr_bytes)
 541                                nr_bytes = rq->data_len;
 542
 543                        blkdev_dequeue_request(rq);
 544                        rq->flags |= REQ_QUIET;
 545                        end_that_request_chunk(rq, 0, nr_bytes);
 546                        end_that_request_last(rq, 0);
 547                } else {
 548                        printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
 549                                                                ret);
 550                        break;
 551                }
 552        }
 553
 554        return rq;
 555}
 556
 557void elv_dequeue_request(request_queue_t *q, struct request *rq)
 558{
 559        BUG_ON(list_empty(&rq->queuelist));
 560
 561        list_del_init(&rq->queuelist);
 562
 563        /*
 564         * the time frame between a request being removed from the lists
 565         * and to it is freed is accounted as io that is in progress at
 566         * the driver side.
 567         */
 568        if (blk_account_rq(rq))
 569                q->in_flight++;
 570}
 571
 572int elv_queue_empty(request_queue_t *q)
 573{
 574        elevator_t *e = q->elevator;
 575
 576        if (!list_empty(&q->queue_head))
 577                return 0;
 578
 579        if (e->ops->elevator_queue_empty_fn)
 580                return e->ops->elevator_queue_empty_fn(q);
 581
 582        return 1;
 583}
 584
 585struct request *elv_latter_request(request_queue_t *q, struct request *rq)
 586{
 587        elevator_t *e = q->elevator;
 588
 589        if (e->ops->elevator_latter_req_fn)
 590                return e->ops->elevator_latter_req_fn(q, rq);
 591        return NULL;
 592}
 593
 594struct request *elv_former_request(request_queue_t *q, struct request *rq)
 595{
 596        elevator_t *e = q->elevator;
 597
 598        if (e->ops->elevator_former_req_fn)
 599                return e->ops->elevator_former_req_fn(q, rq);
 600        return NULL;
 601}
 602
 603int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 604                    gfp_t gfp_mask)
 605{
 606        elevator_t *e = q->elevator;
 607
 608        if (e->ops->elevator_set_req_fn)
 609                return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
 610
 611        rq->elevator_private = NULL;
 612        return 0;
 613}
 614
 615void elv_put_request(request_queue_t *q, struct request *rq)
 616{
 617        elevator_t *e = q->elevator;
 618
 619        if (e->ops->elevator_put_req_fn)
 620                e->ops->elevator_put_req_fn(q, rq);
 621}
 622
 623int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
 624{
 625        elevator_t *e = q->elevator;
 626
 627        if (e->ops->elevator_may_queue_fn)
 628                return e->ops->elevator_may_queue_fn(q, rw, bio);
 629
 630        return ELV_MQUEUE_MAY;
 631}
 632
 633void elv_completed_request(request_queue_t *q, struct request *rq)
 634{
 635        elevator_t *e = q->elevator;
 636
 637        /*
 638         * request is released from the driver, io must be done
 639         */
 640        if (blk_account_rq(rq)) {
 641                q->in_flight--;
 642                if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
 643                        e->ops->elevator_completed_req_fn(q, rq);
 644        }
 645
 646        /*
 647         * Check if the queue is waiting for fs requests to be
 648         * drained for flush sequence.
 649         */
 650        if (unlikely(q->ordseq)) {
 651                struct request *first_rq = list_entry_rq(q->queue_head.next);
 652                if (q->in_flight == 0 &&
 653                    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
 654                    blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
 655                        blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
 656                        q->request_fn(q);
 657                }
 658        }
 659}
 660
 661#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
 662
 663static ssize_t
 664elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 665{
 666        elevator_t *e = container_of(kobj, elevator_t, kobj);
 667        struct elv_fs_entry *entry = to_elv(attr);
 668        ssize_t error;
 669
 670        if (!entry->show)
 671                return -EIO;
 672
 673        mutex_lock(&e->sysfs_lock);
 674        error = e->ops ? entry->show(e, page) : -ENOENT;
 675        mutex_unlock(&e->sysfs_lock);
 676        return error;
 677}
 678
 679static ssize_t
 680elv_attr_store(struct kobject *kobj, struct attribute *attr,
 681               const char *page, size_t length)
 682{
 683        elevator_t *e = container_of(kobj, elevator_t, kobj);
 684        struct elv_fs_entry *entry = to_elv(attr);
 685        ssize_t error;
 686
 687        if (!entry->store)
 688                return -EIO;
 689
 690        mutex_lock(&e->sysfs_lock);
 691        error = e->ops ? entry->store(e, page, length) : -ENOENT;
 692        mutex_unlock(&e->sysfs_lock);
 693        return error;
 694}
 695
 696static struct sysfs_ops elv_sysfs_ops = {
 697        .show   = elv_attr_show,
 698        .store  = elv_attr_store,
 699};
 700
 701static struct kobj_type elv_ktype = {
 702        .sysfs_ops      = &elv_sysfs_ops,
 703        .release        = elevator_release,
 704};
 705
 706int elv_register_queue(struct request_queue *q)
 707{
 708        elevator_t *e = q->elevator;
 709        int error;
 710
 711        e->kobj.parent = &q->kobj;
 712
 713        error = kobject_add(&e->kobj);
 714        if (!error) {
 715                struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
 716                if (attr) {
 717                        while (attr->attr.name) {
 718                                if (sysfs_create_file(&e->kobj, &attr->attr))
 719                                        break;
 720                                attr++;
 721                        }
 722                }
 723                kobject_uevent(&e->kobj, KOBJ_ADD);
 724        }
 725        return error;
 726}
 727
 728static void __elv_unregister_queue(elevator_t *e)
 729{
 730        kobject_uevent(&e->kobj, KOBJ_REMOVE);
 731        kobject_del(&e->kobj);
 732}
 733
 734void elv_unregister_queue(struct request_queue *q)
 735{
 736        if (q)
 737                __elv_unregister_queue(q->elevator);
 738}
 739
 740int elv_register(struct elevator_type *e)
 741{
 742        spin_lock_irq(&elv_list_lock);
 743        BUG_ON(elevator_find(e->elevator_name));
 744        list_add_tail(&e->list, &elv_list);
 745        spin_unlock_irq(&elv_list_lock);
 746
 747        printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
 748        if (!strcmp(e->elevator_name, chosen_elevator) ||
 749                        (!*chosen_elevator &&
 750                         !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
 751                                printk(" (default)");
 752        printk("\n");
 753        return 0;
 754}
 755EXPORT_SYMBOL_GPL(elv_register);
 756
 757void elv_unregister(struct elevator_type *e)
 758{
 759        struct task_struct *g, *p;
 760
 761        /*
 762         * Iterate every thread in the process to remove the io contexts.
 763         */
 764        if (e->ops.trim) {
 765                read_lock(&tasklist_lock);
 766                do_each_thread(g, p) {
 767                        task_lock(p);
 768                        if (p->io_context)
 769                                e->ops.trim(p->io_context);
 770                        task_unlock(p);
 771                } while_each_thread(g, p);
 772                read_unlock(&tasklist_lock);
 773        }
 774
 775        spin_lock_irq(&elv_list_lock);
 776        list_del_init(&e->list);
 777        spin_unlock_irq(&elv_list_lock);
 778}
 779EXPORT_SYMBOL_GPL(elv_unregister);
 780
 781/*
 782 * switch to new_e io scheduler. be careful not to introduce deadlocks -
 783 * we don't free the old io scheduler, before we have allocated what we
 784 * need for the new one. this way we have a chance of going back to the old
 785 * one, if the new one fails init for some reason.
 786 */
 787static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 788{
 789        elevator_t *old_elevator, *e;
 790        void *data;
 791
 792        /*
 793         * Allocate new elevator
 794         */
 795        e = elevator_alloc(new_e);
 796        if (!e)
 797                return 0;
 798
 799        data = elevator_init_queue(q, e);
 800        if (!data) {
 801                kobject_put(&e->kobj);
 802                return 0;
 803        }
 804
 805        /*
 806         * Turn on BYPASS and drain all requests w/ elevator private data
 807         */
 808        spin_lock_irq(q->queue_lock);
 809
 810        set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
 811
 812        elv_drain_elevator(q);
 813
 814        while (q->rq.elvpriv) {
 815                blk_remove_plug(q);
 816                q->request_fn(q);
 817                spin_unlock_irq(q->queue_lock);
 818                msleep(10);
 819                spin_lock_irq(q->queue_lock);
 820                elv_drain_elevator(q);
 821        }
 822
 823        /*
 824         * Remember old elevator.
 825         */
 826        old_elevator = q->elevator;
 827
 828        /*
 829         * attach and start new elevator
 830         */
 831        elevator_attach(q, e, data);
 832
 833        spin_unlock_irq(q->queue_lock);
 834
 835        __elv_unregister_queue(old_elevator);
 836
 837        if (elv_register_queue(q))
 838                goto fail_register;
 839
 840        /*
 841         * finally exit old elevator and turn off BYPASS.
 842         */
 843        elevator_exit(old_elevator);
 844        clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
 845        return 1;
 846
 847fail_register:
 848        /*
 849         * switch failed, exit the new io scheduler and reattach the old
 850         * one again (along with re-adding the sysfs dir)
 851         */
 852        elevator_exit(e);
 853        q->elevator = old_elevator;
 854        elv_register_queue(q);
 855        clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
 856        return 0;
 857}
 858
 859ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
 860{
 861        char elevator_name[ELV_NAME_MAX];
 862        size_t len;
 863        struct elevator_type *e;
 864
 865        elevator_name[sizeof(elevator_name) - 1] = '\0';
 866        strncpy(elevator_name, name, sizeof(elevator_name) - 1);
 867        len = strlen(elevator_name);
 868
 869        if (len && elevator_name[len - 1] == '\n')
 870                elevator_name[len - 1] = '\0';
 871
 872        e = elevator_get(elevator_name);
 873        if (!e) {
 874                printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
 875                return -EINVAL;
 876        }
 877
 878        if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
 879                elevator_put(e);
 880                return count;
 881        }
 882
 883        if (!elevator_switch(q, e))
 884                printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
 885        return count;
 886}
 887
 888ssize_t elv_iosched_show(request_queue_t *q, char *name)
 889{
 890        elevator_t *e = q->elevator;
 891        struct elevator_type *elv = e->elevator_type;
 892        struct list_head *entry;
 893        int len = 0;
 894
 895        spin_lock_irq(&elv_list_lock);
 896        list_for_each(entry, &elv_list) {
 897                struct elevator_type *__e;
 898
 899                __e = list_entry(entry, struct elevator_type, list);
 900                if (!strcmp(elv->elevator_name, __e->elevator_name))
 901                        len += sprintf(name+len, "[%s] ", elv->elevator_name);
 902                else
 903                        len += sprintf(name+len, "%s ", __e->elevator_name);
 904        }
 905        spin_unlock_irq(&elv_list_lock);
 906
 907        len += sprintf(len+name, "\n");
 908        return len;
 909}
 910
 911EXPORT_SYMBOL(elv_dispatch_sort);
 912EXPORT_SYMBOL(elv_add_request);
 913EXPORT_SYMBOL(__elv_add_request);
 914EXPORT_SYMBOL(elv_next_request);
 915EXPORT_SYMBOL(elv_dequeue_request);
 916EXPORT_SYMBOL(elv_queue_empty);
 917EXPORT_SYMBOL(elevator_exit);
 918EXPORT_SYMBOL(elevator_init);
 919