RHEL4/kernel/pid.c
<<
>>
Prefs
   1/*
   2 * Generic pidhash and scalable, time-bounded PID allocator
   3 *
   4 * (C) 2002 William Irwin, IBM
   5 * (C) 2002 Ingo Molnar, Red Hat
   6 *
   7 * pid-structures are backing objects for tasks sharing a given ID to chain
   8 * against. There is very little to them aside from hashing them and
   9 * parking tasks using given ID's on a list.
  10 *
  11 * The hash is always changed with the tasklist_lock write-acquired,
  12 * and the hash is only accessed with the tasklist_lock at least
  13 * read-acquired, so there's no additional SMP locking needed here.
  14 *
  15 * We have a list of bitmap pages, which bitmaps represent the PID space.
  16 * Allocating and freeing PIDs is completely lockless. The worst-case
  17 * allocation scenario when all but one out of 1 million PIDs possible are
  18 * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
  19 * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
  20 */
  21
  22#include <linux/mm.h>
  23#include <linux/module.h>
  24#include <linux/slab.h>
  25#include <linux/init.h>
  26#include <linux/bootmem.h>
  27#include <linux/hash.h>
  28
  29#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
  30static struct hlist_head *pid_hash[PIDTYPE_MAX];
  31static int pidhash_shift;
  32
  33int pid_max = PID_MAX_DEFAULT;
  34int last_pid;
  35
  36#define RESERVED_PIDS           300
  37
  38#define PIDMAP_ENTRIES          (PID_MAX_LIMIT/PAGE_SIZE/8)
  39#define BITS_PER_PAGE           (PAGE_SIZE*8)
  40#define BITS_PER_PAGE_MASK      (BITS_PER_PAGE-1)
  41#define find_next_offset(map, off)                                      \
  42                find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
  43
  44/*
  45 * PID-map pages start out as NULL, they get allocated upon
  46 * first use and are never deallocated. This way a low pid_max
  47 * value does not cause lots of bitmaps to be allocated, but
  48 * the scheme scales to up to 4 million PIDs, runtime.
  49 */
  50typedef struct pidmap {
  51        atomic_t nr_free;
  52        void *page;
  53} pidmap_t;
  54
  55static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
  56         { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
  57
  58static spinlock_t pidmap_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
  59
  60static inline int mk_pid(struct pidmap *map, int off)
  61{
  62        return (map - pidmap_array)*BITS_PER_PAGE + off;
  63}
  64
  65fastcall void free_pidmap(int pid)
  66{
  67        pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
  68        int offset = pid & BITS_PER_PAGE_MASK;
  69
  70        clear_bit(offset, map->page);
  71        atomic_inc(&map->nr_free);
  72}
  73
  74int alloc_pidmap(void)
  75{
  76        int i, offset, max_scan, pid, last = last_pid;
  77        pidmap_t *map;
  78
  79        pid = last + 1;
  80        if (pid >= pid_max)
  81                pid = RESERVED_PIDS;
  82        offset = pid & BITS_PER_PAGE_MASK;
  83        map = &pidmap_array[pid/BITS_PER_PAGE];
  84        max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
  85        for (i = 0; i <= max_scan; ++i) {
  86                if (unlikely(!map->page)) {
  87                        void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
  88                        /*
  89                         * Free the page if someone raced with us
  90                         * installing it:
  91                         */
  92                        spin_lock(&pidmap_lock);
  93                        if (map->page)
  94                                kfree(page);
  95                        else
  96                                map->page = page;
  97                        spin_unlock(&pidmap_lock);
  98                        if (unlikely(!map->page))
  99                                break;
 100                }
 101                if (likely(atomic_read(&map->nr_free))) {
 102                        do {
 103                                if (!test_and_set_bit(offset, map->page)) {
 104                                        atomic_dec(&map->nr_free);
 105                                        last_pid = pid;
 106                                        return pid;
 107                                }
 108                                offset = find_next_offset(map, offset);
 109                                pid = mk_pid(map, offset);
 110                        /*
 111                         * find_next_offset() found a bit, the pid from it
 112                         * is in-bounds, and if we fell back to the last
 113                         * bitmap block and the final block was the same
 114                         * as the starting point, pid is before last_pid.
 115                         */
 116                        } while (offset < BITS_PER_PAGE && pid < pid_max &&
 117                                        (i != max_scan || pid < last ||
 118                                            !((last+1) & BITS_PER_PAGE_MASK)));
 119                }
 120                if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) {
 121                        ++map;
 122                        offset = 0;
 123                } else {
 124                        map = &pidmap_array[0];
 125                        offset = RESERVED_PIDS;
 126                        if (unlikely(last == offset))
 127                                break;
 128                }
 129                pid = mk_pid(map, offset);
 130        }
 131        return -1;
 132}
 133
 134static int next_pidmap(int last)
 135{
 136        int offset;
 137        pidmap_t *map;
 138
 139        offset = (last + 1) & BITS_PER_PAGE_MASK;
 140        map = &pidmap_array[(last + 1)/BITS_PER_PAGE];
 141        for (; map < &pidmap_array[PIDMAP_ENTRIES]; map++, offset = 0) {
 142                if (unlikely(!map->page))
 143                        continue;
 144                offset = find_next_bit(map->page, BITS_PER_PAGE, offset);
 145                if (offset < BITS_PER_PAGE)
 146                        return mk_pid(map, offset);
 147        }
 148        return -1;
 149}
 150
 151struct pid * fastcall find_pid(enum pid_type type, int nr)
 152{
 153        struct hlist_node *elem;
 154        struct pid *pid;
 155
 156        hlist_for_each_entry(pid, elem,
 157                        &pid_hash[type][pid_hashfn(nr)], pid_chain) {
 158                if (pid->nr == nr)
 159                        return pid;
 160        }
 161        return NULL;
 162}
 163
 164int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
 165{
 166        struct pid *pid, *task_pid;
 167
 168        task_pid = &task->pids[type];
 169        pid = find_pid(type, nr);
 170        if (pid == NULL) {
 171                hlist_add_head(&task_pid->pid_chain,
 172                                &pid_hash[type][pid_hashfn(nr)]);
 173                INIT_LIST_HEAD(&task_pid->pid_list);
 174        } else {
 175                INIT_HLIST_NODE(&task_pid->pid_chain);
 176                list_add_tail(&task_pid->pid_list, &pid->pid_list);
 177        }
 178        task_pid->nr = nr;
 179
 180        return 0;
 181}
 182
 183static inline int __detach_pid(task_t *task, enum pid_type type)
 184{
 185        struct pid *pid, *pid_next;
 186        int nr;
 187
 188        pid = &task->pids[type];
 189        if (!hlist_unhashed(&pid->pid_chain)) {
 190                hlist_del(&pid->pid_chain);
 191                if (!list_empty(&pid->pid_list)) {
 192                        pid_next = list_entry(pid->pid_list.next,
 193                                                struct pid, pid_list);
 194                        /* insert next pid from pid_list to hash */
 195                        hlist_add_head(&pid_next->pid_chain,
 196                                &pid_hash[type][pid_hashfn(pid_next->nr)]);
 197                }
 198        }
 199        list_del(&pid->pid_list);
 200        nr = pid->nr;
 201        pid->nr = 0;
 202
 203        return nr;
 204}
 205
 206void fastcall detach_pid(task_t *task, enum pid_type type)
 207{
 208        int nr;
 209
 210        nr = __detach_pid(task, type);
 211        if (!nr)
 212                return;
 213
 214        for (type = 0; type < PIDTYPE_MAX; ++type)
 215                if (find_pid(type, nr))
 216                        return;
 217        free_pidmap(nr);
 218}
 219
 220task_t *find_task_by_pid_type(int type, int nr)
 221{
 222        struct pid *pid;
 223
 224        pid = find_pid(type, nr);
 225        if (!pid)
 226                return NULL;
 227
 228        return pid_task(&pid->pid_list, type);
 229}
 230
 231EXPORT_SYMBOL(find_task_by_pid_type);
 232
 233/*
 234 * This function switches the PIDs if a non-leader thread calls
 235 * sys_execve() - this must be done without releasing the PID.
 236 * (which a detach_pid() would eventually do.)
 237 */
 238void switch_exec_pids(task_t *leader, task_t *thread)
 239{
 240        __detach_pid(leader, PIDTYPE_PID);
 241        __detach_pid(leader, PIDTYPE_TGID);
 242        __detach_pid(leader, PIDTYPE_PGID);
 243        __detach_pid(leader, PIDTYPE_SID);
 244
 245        __detach_pid(thread, PIDTYPE_PID);
 246        __detach_pid(thread, PIDTYPE_TGID);
 247
 248        leader->pid = leader->tgid = thread->pid;
 249        thread->pid = thread->tgid;
 250
 251        attach_pid(thread, PIDTYPE_PID, thread->pid);
 252        attach_pid(thread, PIDTYPE_TGID, thread->tgid);
 253        attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp);
 254        attach_pid(thread, PIDTYPE_SID, thread->signal->session);
 255        list_add_tail(&thread->tasks, &init_task.tasks);
 256
 257        attach_pid(leader, PIDTYPE_PID, leader->pid);
 258        attach_pid(leader, PIDTYPE_TGID, leader->tgid);
 259        attach_pid(leader, PIDTYPE_PGID, leader->signal->pgrp);
 260        attach_pid(leader, PIDTYPE_SID, leader->signal->session);
 261}
 262
 263/**
 264 * pid_alive - check that a task structure is not stale
 265 * @p: Task structure to be checked.
 266 *
 267 * Test if a process is not yet dead (at most zombie state)
 268 * If pid_alive fails, then pointers within the task structure
 269 * can be stale and must not be dereferenced.
 270 */
 271int pid_alive(struct task_struct *p)
 272{
 273        return p->pids[PIDTYPE_PID].nr != 0;
 274}
 275
 276/*
 277 * Used by proc to find the first pid that is greater then or equal to nr.
 278 *
 279 * If there is a pid at nr this function is exactly the same as find_pid.
 280 */
 281struct pid *find_ge_pid(int nr)
 282{
 283        struct pid *pid;
 284
 285        if (nr == 0)
 286                nr = 1;
 287
 288        do {
 289                pid = find_pid(PIDTYPE_PID, nr);
 290                if (pid)
 291                        break;
 292                nr = next_pidmap(nr);
 293        } while (nr > 0);
 294
 295        return pid;
 296}
 297
 298/*
 299 * The pid hash table is scaled according to the amount of memory in the
 300 * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
 301 * more.
 302 */
 303void __init pidhash_init(void)
 304{
 305        int i, j, pidhash_size;
 306        unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
 307
 308        pidhash_shift = max(10, fls(megabytes * 4));
 309        pidhash_shift = min(12, pidhash_shift);
 310        pidhash_size = 1 << pidhash_shift;
 311
 312        printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
 313                pidhash_size, pidhash_shift,
 314                PIDTYPE_MAX * pidhash_size * sizeof(struct hlist_head));
 315
 316        for (i = 0; i < PIDTYPE_MAX; i++) {
 317                pid_hash[i] = alloc_bootmem(pidhash_size *
 318                                        sizeof(*(pid_hash[i])));
 319                if (!pid_hash[i])
 320                        panic("Could not alloc pidhash!\n");
 321                for (j = 0; j < pidhash_size; j++)
 322                        INIT_HLIST_HEAD(&pid_hash[i][j]);
 323        }
 324}
 325
 326void __init pidmap_init(void)
 327{
 328        int i;
 329
 330        pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);
 331        set_bit(0, pidmap_array->page);
 332        atomic_dec(&pidmap_array->nr_free);
 333
 334        /*
 335         * Allocate PID 0, and hash it via all PID types:
 336         */
 337
 338        for (i = 0; i < PIDTYPE_MAX; i++)
 339                attach_pid(current, i, 0);
 340}
 341