RHEL4/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/stat.h>
  16#include <linux/time.h>
  17#include <linux/mm.h>
  18#include <linux/mman.h>
  19#include <linux/a.out.h>
  20#include <linux/errno.h>
  21#include <linux/signal.h>
  22#include <linux/binfmts.h>
  23#include <linux/string.h>
  24#include <linux/file.h>
  25#include <linux/fcntl.h>
  26#include <linux/ptrace.h>
  27#include <linux/slab.h>
  28#include <linux/shm.h>
  29#include <linux/personality.h>
  30#include <linux/elfcore.h>
  31#include <linux/init.h>
  32#include <linux/highuid.h>
  33#include <linux/smp.h>
  34#include <linux/smp_lock.h>
  35#include <linux/compiler.h>
  36#include <linux/highmem.h>
  37#include <linux/pagemap.h>
  38#include <linux/security.h>
  39#include <linux/syscalls.h>
  40
  41#include <asm/uaccess.h>
  42#include <asm/param.h>
  43#include <asm/page.h>
  44
  45#include <linux/elf.h>
  46
  47static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs);
  48static int load_elf_library(struct file*);
  49static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
  50extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
  51
  52#ifndef elf_addr_t
  53#define elf_addr_t unsigned long
  54#endif
  55
  56/*
  57 * If we don't support core dumping, then supply a NULL so we
  58 * don't even try.
  59 */
  60#ifdef USE_ELF_CORE_DUMP
  61static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file);
  62#else
  63#define elf_core_dump   NULL
  64#endif
  65
  66#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  67# define ELF_MIN_ALIGN  ELF_EXEC_PAGESIZE
  68#else
  69# define ELF_MIN_ALIGN  PAGE_SIZE
  70#endif
  71
  72#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  73#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  74#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  75
  76static struct linux_binfmt elf_format = {
  77                .module         = THIS_MODULE,
  78                .load_binary    = load_elf_binary,
  79                .load_shlib     = load_elf_library,
  80                .core_dump      = elf_core_dump,
  81                .min_coredump   = ELF_EXEC_PAGESIZE
  82};
  83
  84#define BAD_ADDR(x)     ((unsigned long)(x) >= TASK_SIZE)
  85
  86static int set_brk(unsigned long start, unsigned long end)
  87{
  88        start = ELF_PAGEALIGN(start);
  89        end = ELF_PAGEALIGN(end);
  90        if (end > start) {
  91                unsigned long addr = do_brk_locked(start, end - start);
  92                if (BAD_ADDR(addr))
  93                        return addr;
  94        }
  95        current->mm->start_brk = current->mm->brk = end;
  96        return 0;
  97}
  98
  99
 100/* We need to explicitly zero any fractional pages
 101   after the data section (i.e. bss).  This would
 102   contain the junk from the file that should not
 103   be in memory */
 104
 105
 106static void padzero(unsigned long elf_bss)
 107{
 108        unsigned long nbyte;
 109
 110        nbyte = ELF_PAGEOFFSET(elf_bss);
 111        if (nbyte) {
 112                nbyte = ELF_MIN_ALIGN - nbyte;
 113                clear_user((void __user *) elf_bss, nbyte);
 114        }
 115}
 116
 117/* Let's use some macros to make this stack manipulation a litle clearer */
 118#ifdef CONFIG_STACK_GROWSUP
 119#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 120#define STACK_ROUND(sp, items) \
 121        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 122#define STACK_ALLOC(sp, len) ({ elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; old_sp; })
 123#else
 124#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 125#define STACK_ROUND(sp, items) \
 126        (((unsigned long) (sp - items)) &~ 15UL)
 127#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 128#endif
 129
 130static void
 131create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
 132                int interp_aout, unsigned long load_addr,
 133                unsigned long interp_load_addr)
 134{
 135        unsigned long p = bprm->p;
 136        int argc = bprm->argc;
 137        int envc = bprm->envc;
 138        elf_addr_t __user *argv;
 139        elf_addr_t __user *envp;
 140        elf_addr_t __user *sp;
 141        elf_addr_t __user *u_platform;
 142        const char *k_platform = ELF_PLATFORM;
 143        int items;
 144        elf_addr_t *elf_info;
 145        int ei_index = 0;
 146        struct task_struct *tsk = current;
 147
 148        /*
 149         * If this architecture has a platform capability string, copy it
 150         * to userspace.  In some cases (Sparc), this info is impossible
 151         * for userspace to get any other way, in others (i386) it is
 152         * merely difficult.
 153         */
 154
 155        u_platform = NULL;
 156        if (k_platform) {
 157                size_t len = strlen(k_platform) + 1;
 158
 159#ifdef __HAVE_ARCH_ALIGN_STACK
 160                p = (unsigned long)arch_align_stack((unsigned long)p);
 161#endif
 162                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 163                __copy_to_user(u_platform, k_platform, len);
 164        }
 165
 166        /* Create the ELF interpreter info */
 167        elf_info = (elf_addr_t *) current->mm->saved_auxv;
 168#define NEW_AUX_ENT(id, val) \
 169        do { elf_info[ei_index++] = id; elf_info[ei_index++] = val; } while (0)
 170
 171#ifdef ARCH_DLINFO
 172        /* 
 173         * ARCH_DLINFO must come first so PPC can do its special alignment of
 174         * AUXV.
 175         */
 176        ARCH_DLINFO;
 177#endif
 178        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 179        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 180        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 181        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 182        NEW_AUX_ENT(AT_PHENT, sizeof (struct elf_phdr));
 183        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 184        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 185        NEW_AUX_ENT(AT_FLAGS, 0);
 186        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 187        NEW_AUX_ENT(AT_UID, (elf_addr_t) tsk->uid);
 188        NEW_AUX_ENT(AT_EUID, (elf_addr_t) tsk->euid);
 189        NEW_AUX_ENT(AT_GID, (elf_addr_t) tsk->gid);
 190        NEW_AUX_ENT(AT_EGID, (elf_addr_t) tsk->egid);
 191        NEW_AUX_ENT(AT_SECURE, (elf_addr_t) security_bprm_secureexec(bprm));
 192        if (k_platform) {
 193                NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t)(unsigned long)u_platform);
 194        }
 195        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 196                NEW_AUX_ENT(AT_EXECFD, (elf_addr_t) bprm->interp_data);
 197        }
 198#undef NEW_AUX_ENT
 199        /* AT_NULL is zero; clear the rest too */
 200        memset(&elf_info[ei_index], 0,
 201               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 202
 203        /* And advance past the AT_NULL entry.  */
 204        ei_index += 2;
 205
 206        sp = STACK_ADD(p, ei_index);
 207
 208        items = (argc + 1) + (envc + 1);
 209        if (interp_aout) {
 210                items += 3; /* a.out interpreters require argv & envp too */
 211        } else {
 212                items += 1; /* ELF interpreters only put argc on the stack */
 213        }
 214        bprm->p = STACK_ROUND(sp, items);
 215
 216        /* Point sp at the lowest address on the stack */
 217#ifdef CONFIG_STACK_GROWSUP
 218        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 219        bprm->exec = (unsigned long) sp; /* XXX: PARISC HACK */
 220#else
 221        sp = (elf_addr_t __user *)bprm->p;
 222#endif
 223
 224        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 225        __put_user(argc, sp++);
 226        if (interp_aout) {
 227                argv = sp + 2;
 228                envp = argv + argc + 1;
 229                __put_user((elf_addr_t)(unsigned long)argv, sp++);
 230                __put_user((elf_addr_t)(unsigned long)envp, sp++);
 231        } else {
 232                argv = sp;
 233                envp = argv + argc + 1;
 234        }
 235
 236        /* Populate argv and envp */
 237        p = current->mm->arg_end = current->mm->arg_start;
 238        while (argc-- > 0) {
 239                size_t len;
 240                __put_user((elf_addr_t)p, argv++);
 241                len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 242                if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
 243                        return;
 244                p += len;
 245        }
 246        __put_user(0, argv);
 247        current->mm->arg_end = current->mm->env_start = p;
 248        while (envc-- > 0) {
 249                size_t len;
 250                __put_user((elf_addr_t)p, envp++);
 251                len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 252                if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
 253                        return;
 254                p += len;
 255        }
 256        __put_user(0, envp);
 257        current->mm->env_end = p;
 258
 259        /* Put the elf_info on the stack in the right place.  */
 260        sp = (elf_addr_t __user *)envp + 1;
 261        copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t));
 262}
 263
 264#ifndef elf_map
 265
 266static unsigned long elf_map(struct file *filep, unsigned long addr,
 267                             struct elf_phdr *eppnt, int prot, int type,
 268                             unsigned long total_size)
 269{
 270        unsigned long map_addr;
 271        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 272        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 273
 274        addr = ELF_PAGESTART(addr);
 275        size = ELF_PAGEALIGN(size);
 276
 277        down_write(&current->mm->mmap_sem);
 278
 279        /*
 280         * total_size is the size of the ELF (interpreter) image.
 281         * The _first_ mmap needs to know the full size, otherwise
 282         * randomization might put this image into an overlapping
 283         * position with the ELF binary image. (since size < total_size)
 284         * So we first map the 'big' image - and unmap the remainder at
 285         * the end. (which unmap is needed for ELF images with holes.)
 286         */
 287        if (total_size) {
 288                total_size = ELF_PAGEALIGN(total_size);
 289                map_addr = do_mmap(filep, addr, total_size, prot, type, off);
 290                if (!BAD_ADDR(map_addr))
 291                        do_munmap(current->mm, map_addr+size, total_size-size);
 292        } else
 293                map_addr = do_mmap(filep, addr, size, prot, type, off);
 294                
 295        up_write(&current->mm->mmap_sem);
 296
 297        return map_addr;
 298}
 299
 300#endif /* !elf_map */
 301
 302static inline unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 303{
 304        int i, first_idx = -1, last_idx = -1;
 305
 306        for (i = 0; i < nr; i++)
 307                if (cmds[i].p_type == PT_LOAD) {
 308                        last_idx = i;
 309                        if (first_idx == -1)
 310                                first_idx = i;
 311                }
 312
 313        if (first_idx == -1)
 314                return 0;
 315
 316        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 317                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 318}
 319
 320/* This is much more generalized than the library routine read function,
 321   so we keep this separate.  Technically the library read function
 322   is only provided so that we can read a.out libraries that have
 323   an ELF header */
 324
 325static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
 326                                     struct file * interpreter,
 327                                     unsigned long *interp_load_addr,
 328                                     unsigned long no_base)
 329{
 330        struct elf_phdr *elf_phdata;
 331        struct elf_phdr *eppnt;
 332        unsigned long load_addr = 0;
 333        int load_addr_set = 0;
 334        unsigned long last_bss = 0, elf_bss = 0;
 335        unsigned long error = ~0UL;
 336        unsigned long total_size;
 337        int retval, i, size;
 338
 339        /* First of all, some simple consistency checks */
 340        if (interp_elf_ex->e_type != ET_EXEC &&
 341            interp_elf_ex->e_type != ET_DYN)
 342                goto out;
 343        if (!elf_check_arch(interp_elf_ex))
 344                goto out;
 345        if (!interpreter->f_op || !interpreter->f_op->mmap)
 346                goto out;
 347
 348        /*
 349         * If the size of this structure has changed, then punt, since
 350         * we will be doing the wrong thing.
 351         */
 352        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 353                goto out;
 354        if (interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 355                goto out;
 356
 357        /* Now read in all of the header information */
 358
 359        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 360        if (size > ELF_MIN_ALIGN)
 361                goto out;
 362        elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
 363        if (!elf_phdata)
 364                goto out;
 365
 366        retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size);
 367        error = -EIO;
 368        if (retval != size) {
 369                if (retval < 0)
 370                        error = retval; 
 371                goto out_close;
 372        }
 373
 374        total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 375        if (!total_size)
 376                goto out_close;
 377
 378        eppnt = elf_phdata;
 379        for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
 380          if (eppnt->p_type == PT_LOAD) {
 381            int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 382            int elf_prot = 0;
 383            unsigned long vaddr = 0;
 384            unsigned long k, map_addr;
 385
 386            if (eppnt->p_flags & PF_R) elf_prot =  PROT_READ;
 387            if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
 388            if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
 389            vaddr = eppnt->p_vaddr;
 390            if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 391                elf_type |= MAP_FIXED;
 392            else if (no_base && interp_elf_ex->e_type == ET_DYN)
 393                load_addr = -vaddr;
 394
 395            map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type, total_size);
 396            total_size = 0;
 397            error = map_addr;
 398            if (BAD_ADDR(map_addr))
 399                goto out_close;
 400
 401            if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) {
 402                load_addr = map_addr - ELF_PAGESTART(vaddr);
 403                load_addr_set = 1;
 404            }
 405
 406            /*
 407             * Check to see if the section's size will overflow the
 408             * allowed task size. Note that p_filesz must always be
 409             * <= p_memsize so it is only necessary to check p_memsz.
 410             */
 411            k = load_addr + eppnt->p_vaddr;
 412            if (BAD_ADDR(k) || eppnt->p_filesz > eppnt->p_memsz ||
 413                eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) {
 414                error = -ENOMEM;
 415                goto out_close;
 416            }
 417
 418            /*
 419             * Find the end of the file mapping for this phdr, and keep
 420             * track of the largest address we see for this.
 421             */
 422            k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 423            if (k > elf_bss)
 424                elf_bss = k;
 425
 426            /*
 427             * Do the same thing for the memory mapping - between
 428             * elf_bss and last_bss is the bss section.
 429             */
 430            k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 431            if (k > last_bss)
 432                last_bss = k;
 433          }
 434        }
 435
 436        /*
 437         * Now fill out the bss section.  First pad the last page up
 438         * to the page boundary, and then perform a mmap to make sure
 439         * that there are zero-mapped pages up to and including the 
 440         * last bss page.
 441         */
 442        padzero(elf_bss);
 443        elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);   /* What we have mapped so far */
 444
 445        /* Map the last of the bss segment */
 446        if (last_bss > elf_bss) {
 447                error = do_brk_locked(elf_bss, last_bss - elf_bss);
 448                if (BAD_ADDR(error))
 449                        goto out_close;
 450        }
 451
 452        *interp_load_addr = load_addr;
 453        error = ((unsigned long) interp_elf_ex->e_entry) + load_addr;
 454
 455out_close:
 456        kfree(elf_phdata);
 457out:
 458        return error;
 459}
 460
 461static unsigned long load_aout_interp(struct exec * interp_ex,
 462                             struct file * interpreter)
 463{
 464        unsigned long text_data, elf_entry = ~0UL;
 465        char __user * addr;
 466        loff_t offset;
 467
 468        current->mm->end_code = interp_ex->a_text;
 469        text_data = interp_ex->a_text + interp_ex->a_data;
 470        current->mm->end_data = text_data;
 471        current->mm->brk = interp_ex->a_bss + text_data;
 472
 473        switch (N_MAGIC(*interp_ex)) {
 474        case OMAGIC:
 475                offset = 32;
 476                addr = (char __user *)0;
 477                break;
 478        case ZMAGIC:
 479        case QMAGIC:
 480                offset = N_TXTOFF(*interp_ex);
 481                addr = (char __user *) N_TXTADDR(*interp_ex);
 482                break;
 483        default:
 484                goto out;
 485        }
 486
 487        do_brk_locked(0, text_data);
 488        if (!interpreter->f_op || !interpreter->f_op->read)
 489                goto out;
 490        if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
 491                goto out;
 492        flush_icache_range((unsigned long)addr,
 493                           (unsigned long)addr + text_data);
 494
 495        do_brk_locked(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
 496                interp_ex->a_bss);
 497        elf_entry = interp_ex->a_entry;
 498
 499out:
 500        return elf_entry;
 501}
 502
 503/*
 504 * These are the functions used to load ELF style executables and shared
 505 * libraries.  There is no binary dependent code anywhere else.
 506 */
 507
 508#define INTERPRETER_NONE 0
 509#define INTERPRETER_AOUT 1
 510#define INTERPRETER_ELF 2
 511
 512
 513static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 514{
 515        struct file *interpreter = NULL; /* to shut gcc up */
 516        unsigned long load_addr = 0, load_bias = 0;
 517        int load_addr_set = 0;
 518        char * elf_interpreter = NULL;
 519        unsigned int interpreter_type = INTERPRETER_NONE;
 520        unsigned char ibcs2_interpreter = 0;
 521        unsigned long error;
 522        struct elf_phdr * elf_ppnt, *elf_phdata;
 523        unsigned long elf_bss, elf_brk;
 524        int elf_exec_fileno;
 525        int retval, i;
 526        unsigned int size;
 527        unsigned long elf_entry, interp_load_addr = 0;
 528        unsigned long start_code, end_code, start_data, end_data;
 529        unsigned long reloc_func_desc = 0;
 530        char passed_fileno[6];
 531        struct files_struct *files;
 532        int have_pt_gnu_stack, executable_stack, relocexec, old_relocexec = current->flags & PF_RELOCEXEC;
 533        unsigned long def_flags = 0;
 534        struct {
 535                struct elfhdr elf_ex;
 536                struct elfhdr interp_elf_ex;
 537                struct exec interp_ex;
 538        } *loc;
 539
 540        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 541        if (!loc) {
 542                retval = -ENOMEM;
 543                goto out_ret;
 544        }
 545        
 546        /* Get the exec-header */
 547        loc->elf_ex = *((struct elfhdr *) bprm->buf);
 548
 549        retval = -ENOEXEC;
 550        /* First of all, some simple consistency checks */
 551        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 552                goto out;
 553
 554        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 555                goto out;
 556        if (!elf_check_arch(&loc->elf_ex))
 557                goto out;
 558        if (!bprm->file->f_op||!bprm->file->f_op->mmap)
 559                goto out;
 560
 561        /* Now read in all of the header information */
 562
 563        retval = -ENOMEM;
 564        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 565                goto out;
 566        if (loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 567                goto out;
 568        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 569        elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
 570        if (!elf_phdata)
 571                goto out;
 572
 573        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, (char *) elf_phdata, size);
 574        if (retval != size) {
 575                if (retval >= 0)
 576                        retval = -EIO;
 577                goto out_free_ph;
 578        }
 579
 580        files = current->files;         /* Refcounted so ok */
 581        retval = unshare_files();
 582        if (retval < 0)
 583                goto out_free_ph;
 584        if (files == current->files) {
 585                put_files_struct(files);
 586                files = NULL;
 587        }
 588
 589        /* exec will make our files private anyway, but for the a.out
 590           loader stuff we need to do it earlier */
 591
 592        retval = get_unused_fd();
 593        if (retval < 0)
 594                goto out_free_fh;
 595        get_file(bprm->file);
 596        fd_install(elf_exec_fileno = retval, bprm->file);
 597
 598        elf_ppnt = elf_phdata;
 599        elf_bss = 0;
 600        elf_brk = 0;
 601
 602        start_code = ~0UL;
 603        end_code = 0;
 604        start_data = 0;
 605        end_data = 0;
 606
 607        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 608                if (elf_ppnt->p_type == PT_INTERP) {
 609                        /* This is the program interpreter used for
 610                         * shared libraries - for now assume that this
 611                         * is an a.out format binary
 612                         */
 613
 614                        retval = -ENOMEM;
 615                        if (elf_ppnt->p_filesz > PATH_MAX || 
 616                            elf_ppnt->p_filesz == 0)
 617                                goto out_free_file;
 618                        elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz,
 619                                                           GFP_KERNEL);
 620                        if (!elf_interpreter)
 621                                goto out_free_file;
 622
 623                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 624                                           elf_interpreter,
 625                                           elf_ppnt->p_filesz);
 626                        if (retval != elf_ppnt->p_filesz) {
 627                                if (retval >= 0)
 628                                        retval = -EIO;
 629                                goto out_free_interp;
 630                        }
 631                        /* make sure path is NULL terminated */
 632                        retval = -EINVAL;
 633                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 634                                goto out_free_interp;
 635
 636                        /* If the program interpreter is one of these two,
 637                         * then assume an iBCS2 image. Otherwise assume
 638                         * a native linux image.
 639                         */
 640                        if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
 641                            strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
 642                                ibcs2_interpreter = 1;
 643
 644                        /*
 645                         * The early SET_PERSONALITY here is so that the lookup
 646                         * for the interpreter happens in the namespace of the 
 647                         * to-be-execed image.  SET_PERSONALITY can select an
 648                         * alternate root.
 649                         *
 650                         * However, SET_PERSONALITY is NOT allowed to switch
 651                         * this task into the new images's memory mapping
 652                         * policy - that is, TASK_SIZE must still evaluate to
 653                         * that which is appropriate to the execing application.
 654                         * This is because exit_mmap() needs to have TASK_SIZE
 655                         * evaluate to the size of the old image.
 656                         *
 657                         * So if (say) a 64-bit application is execing a 32-bit
 658                         * application it is the architecture's responsibility
 659                         * to defer changing the value of TASK_SIZE until the
 660                         * switch really is going to happen - do this in
 661                         * flush_thread().      - akpm
 662                         */
 663                        SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 664
 665                        interpreter = open_exec(elf_interpreter);
 666                        retval = PTR_ERR(interpreter);
 667                        if (IS_ERR(interpreter))
 668                                goto out_free_interp;
 669
 670                        /*
 671                         * If the binary is not readable then enforce
 672                         * mm->dumpable = 0 regardless of the interpreter's
 673                         * permissions.
 674                         */
 675                        if (permission(interpreter->f_dentry->d_inode, MAY_READ, NULL) < 0)
 676                                bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 677
 678                        retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);
 679                        if (retval != BINPRM_BUF_SIZE) {
 680                                if (retval >= 0)
 681                                        retval = -EIO;
 682                                goto out_free_dentry;
 683                        }
 684
 685                        /* Get the exec headers */
 686                        loc->interp_ex = *((struct exec *) bprm->buf);
 687                        loc->interp_elf_ex = *((struct elfhdr *) bprm->buf);
 688                        break;
 689                }
 690                elf_ppnt++;
 691        }
 692
 693        elf_ppnt = elf_phdata;
 694        executable_stack = EXSTACK_DEFAULT;
 695
 696        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 697                if (elf_ppnt->p_type == PT_GNU_STACK) {
 698                        if (elf_ppnt->p_flags & PF_X)
 699                                executable_stack = EXSTACK_ENABLE_X;
 700                        else
 701                                executable_stack = EXSTACK_DISABLE_X;
 702                        break;
 703                }
 704        have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
 705
 706        relocexec = 0;
 707
 708        if (current->personality == PER_LINUX ||
 709            bprm->e_uid != current->euid ||
 710            bprm->e_gid != current->egid)
 711        switch (exec_shield) {
 712        case 1:
 713                if (executable_stack == EXSTACK_DISABLE_X) {
 714                        current->flags |= PF_RELOCEXEC;
 715                        relocexec = PF_RELOCEXEC;
 716                }
 717                break;
 718
 719        case 2:
 720                executable_stack = EXSTACK_DISABLE_X;
 721                current->flags |= PF_RELOCEXEC;
 722                relocexec = PF_RELOCEXEC;
 723                break;
 724        }
 725
 726        /* Some simple consistency checks for the interpreter */
 727        if (elf_interpreter) {
 728                interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
 729
 730                /* Now figure out which format our binary is */
 731                if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
 732                    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
 733                    (N_MAGIC(loc->interp_ex) != QMAGIC))
 734                        interpreter_type = INTERPRETER_ELF;
 735
 736                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 737                        interpreter_type &= ~INTERPRETER_ELF;
 738
 739                retval = -ELIBBAD;
 740                if (!interpreter_type)
 741                        goto out_free_dentry;
 742
 743                /* Make sure only one type was selected */
 744                if ((interpreter_type & INTERPRETER_ELF) &&
 745                     interpreter_type != INTERPRETER_ELF) {
 746                        // FIXME - ratelimit this before re-enabling
 747                        // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
 748                        interpreter_type = INTERPRETER_ELF;
 749                }
 750                /* Verify the interpreter has a valid arch */
 751                if ((interpreter_type == INTERPRETER_ELF) &&
 752                    !elf_check_arch(&loc->interp_elf_ex))
 753                        goto out_free_dentry;
 754        } else {
 755                /* Executables without an interpreter also need a personality  */
 756                SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 757        }
 758
 759        /* OK, we are done with that, now set up the arg stuff,
 760           and then start this sucker up */
 761
 762        if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
 763                char *passed_p = passed_fileno;
 764                sprintf(passed_fileno, "%d", elf_exec_fileno);
 765
 766                if (elf_interpreter) {
 767                        retval = copy_strings_kernel(1, &passed_p, bprm);
 768                        if (retval)
 769                                goto out_free_dentry; 
 770                        bprm->argc++;
 771                }
 772        }
 773
 774        /* Flush all traces of the currently running executable */
 775        retval = flush_old_exec(bprm);
 776        if (retval)
 777                goto out_free_dentry;
 778        current->flags |= relocexec;
 779
 780#ifdef __i386__
 781        /*
 782         * Turn off the CS limit completely if exec-shield disabled or
 783         * NX active:
 784         */
 785        if (!exec_shield || executable_stack != EXSTACK_DISABLE_X || nx_enabled)
 786                arch_add_exec_range(current->mm, -1);
 787#endif
 788
 789        /* Discard our unneeded old files struct */
 790        if (files) {
 791                steal_locks(files);
 792                put_files_struct(files);
 793                files = NULL;
 794        }
 795
 796        /* OK, This is the point of no return */
 797        current->mm->start_data = 0;
 798        current->mm->end_data = 0;
 799        current->mm->end_code = 0;
 800        current->mm->mmap = NULL;
 801        current->flags &= ~PF_FORKNOEXEC;
 802        current->mm->def_flags = def_flags;
 803
 804        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 805           may depend on the personality.  */
 806        SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 807        if (exec_shield != 2 &&
 808                        elf_read_implies_exec(loc->elf_ex, have_pt_gnu_stack))
 809                current->personality |= READ_IMPLIES_EXEC;
 810
 811        arch_pick_mmap_layout(current->mm);
 812
 813        /* Do this so that we can load the interpreter, if need be.  We will
 814           change some of these later */
 815        current->mm->rss = 0;
 816        current->mm->free_area_cache = current->mm->mmap_base;
 817        retval = setup_arg_pages(bprm, executable_stack);
 818        if (retval < 0) {
 819                send_sig(SIGKILL, current, 0);
 820                goto out_free_dentry;
 821        }
 822        
 823        current->mm->start_stack = bprm->p;
 824
 825
 826        /* Now we do a little grungy work by mmaping the ELF image into
 827           the correct location in memory.
 828         */
 829
 830        for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 831                int elf_prot = 0, elf_flags;
 832                unsigned long k, vaddr;
 833
 834                if (elf_ppnt->p_type != PT_LOAD)
 835                        continue;
 836
 837                if (unlikely (elf_brk > elf_bss)) {
 838                        unsigned long nbyte;
 839                    
 840                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 841                           before this one. Map anonymous pages, if needed,
 842                           and clear the area.  */
 843                        retval = set_brk (elf_bss + load_bias,
 844                                          elf_brk + load_bias);
 845                        if (retval) {
 846                                send_sig(SIGKILL, current, 0);
 847                                goto out_free_dentry;
 848                        }
 849                        nbyte = ELF_PAGEOFFSET(elf_bss);
 850                        if (nbyte) {
 851                                nbyte = ELF_MIN_ALIGN - nbyte;
 852                                if (nbyte > elf_brk - elf_bss)
 853                                        nbyte = elf_brk - elf_bss;
 854                                clear_user((void __user *) elf_bss + load_bias, nbyte);
 855                        }
 856                }
 857
 858                if (elf_ppnt->p_flags & PF_R) elf_prot |= PROT_READ;
 859                if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
 860                if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
 861
 862                elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
 863
 864                vaddr = elf_ppnt->p_vaddr;
 865                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set)
 866                        elf_flags |= MAP_FIXED;
 867                else if (loc->elf_ex.e_type == ET_DYN)
 868#ifdef __i386__
 869                        load_bias = 0;
 870#else
 871                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 872#endif
 873
 874                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags, 0);
 875                if (BAD_ADDR(error)) {
 876                        send_sig(SIGKILL, current, 0);
 877                        goto out_free_dentry;
 878                }
 879
 880                if (!load_addr_set) {
 881                        load_addr_set = 1;
 882                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 883                        if (loc->elf_ex.e_type == ET_DYN) {
 884                                load_bias += error -
 885                                             ELF_PAGESTART(load_bias + vaddr);
 886                                load_addr += load_bias;
 887                                reloc_func_desc = load_bias;
 888                        }
 889                }
 890                k = elf_ppnt->p_vaddr;
 891                if (k < start_code) start_code = k;
 892                if (start_data < k) start_data = k;
 893
 894                /*
 895                 * Check to see if the section's size will overflow the
 896                 * allowed task size. Note that p_filesz must always be
 897                 * <= p_memsz so it is only necessary to check p_memsz.
 898                 */
 899                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 900                    elf_ppnt->p_memsz > TASK_SIZE ||
 901                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 902                        /* set_brk can never work.  Avoid overflows.  */
 903                        send_sig(SIGKILL, current, 0);
 904                        goto out_free_dentry;
 905                }
 906
 907                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 908
 909                if (k > elf_bss)
 910                        elf_bss = k;
 911                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 912                        end_code = k;
 913                if (end_data < k)
 914                        end_data = k;
 915                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 916                if (k > elf_brk)
 917                        elf_brk = k;
 918        }
 919
 920        loc->elf_ex.e_entry += load_bias;
 921        elf_bss += load_bias;
 922        elf_brk += load_bias;
 923        start_code += load_bias;
 924        end_code += load_bias;
 925        start_data += load_bias;
 926        end_data += load_bias;
 927
 928        /* Calling set_brk effectively mmaps the pages that we need
 929         * for the bss and break sections.  We must do this before
 930         * mapping in the interpreter, to make sure it doesn't wind
 931         * up getting placed where the bss needs to go.
 932         */
 933        retval = set_brk(elf_bss, elf_brk);
 934        if (retval) {
 935                send_sig(SIGKILL, current, 0);
 936                goto out_free_dentry;
 937        }
 938        padzero(elf_bss);
 939
 940        if (elf_interpreter) {
 941                if (interpreter_type == INTERPRETER_AOUT)
 942                        elf_entry = load_aout_interp(&loc->interp_ex,
 943                                                     interpreter);
 944                else
 945                        elf_entry = load_elf_interp(&loc->interp_elf_ex,
 946                                                    interpreter,
 947                                                    &interp_load_addr,
 948                                                    load_bias);
 949                if (BAD_ADDR(elf_entry)) {
 950                        force_sig(SIGSEGV, current);
 951                        retval = IS_ERR((void *)elf_entry) ?
 952                                        (int)elf_entry : -EINVAL;
 953                        goto out_free_dentry;
 954                }
 955                reloc_func_desc = interp_load_addr;
 956
 957                allow_write_access(interpreter);
 958                fput(interpreter);
 959                kfree(elf_interpreter);
 960        } else {
 961                elf_entry = loc->elf_ex.e_entry;
 962                if (BAD_ADDR(elf_entry)) {
 963                        force_sig(SIGSEGV, current);
 964                        retval = -EINVAL;
 965                        goto out_free_dentry;
 966                }
 967        }
 968
 969        kfree(elf_phdata);
 970
 971        if (interpreter_type != INTERPRETER_AOUT)
 972                sys_close(elf_exec_fileno);
 973
 974        set_binfmt(&elf_format);
 975
 976        /*
 977         * Map the vsyscall trampoline. This address is then passed via
 978         * AT_SYSINFO.
 979         */
 980#ifdef __HAVE_ARCH_VSYSCALL
 981        map_vsyscall();
 982#endif
 983#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 984        retval = arch_setup_additional_pages(bprm, executable_stack);
 985        if (retval < 0) {
 986                send_sig(SIGKILL, current, 0);
 987                goto out;
 988        }
 989#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 990
 991        compute_creds(bprm);
 992        current->flags &= ~PF_FORKNOEXEC;
 993        create_elf_tables(bprm, &loc->elf_ex, (interpreter_type == INTERPRETER_AOUT),
 994                        load_addr, interp_load_addr);
 995        /* N.B. passed_fileno might not be initialized? */
 996        if (interpreter_type == INTERPRETER_AOUT)
 997                current->mm->arg_start += strlen(passed_fileno) + 1;
 998        current->mm->end_code = end_code;
 999        current->mm->start_code = start_code;
1000        current->mm->start_data = start_data;
1001        current->mm->end_data = end_data;
1002        current->mm->start_stack = bprm->p;
1003
1004#ifdef __HAVE_ARCH_RANDOMIZE_BRK
1005        if (current->flags & PF_RELOCEXEC)
1006                randomize_brk(elf_brk);
1007#endif
1008        if (current->personality & MMAP_PAGE_ZERO) {
1009                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1010                   and some applications "depend" upon this behavior.
1011                   Since we do not have the power to recompile these, we
1012                   emulate the SVr4 behavior.  Sigh.  */
1013                down_write(&current->mm->mmap_sem);
1014                error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1015                                MAP_FIXED | MAP_PRIVATE, 0);
1016                up_write(&current->mm->mmap_sem);
1017        }
1018
1019#ifdef ELF_PLAT_INIT
1020        /*
1021         * The ABI may specify that certain registers be set up in special
1022         * ways (on i386 %edx is the address of a DT_FINI function, for
1023         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1024         * that the e_entry field is the address of the function descriptor
1025         * for the startup routine, rather than the address of the startup
1026         * routine itself.  This macro performs whatever initialization to
1027         * the regs structure is required as well as any relocations to the
1028         * function descriptor entries when executing dynamically links apps.
1029         */
1030        ELF_PLAT_INIT(regs, reloc_func_desc);
1031#endif
1032
1033        start_thread(regs, elf_entry, bprm->p);
1034        if (unlikely(current->ptrace & PT_PTRACED)) {
1035                if (current->ptrace & PT_TRACE_EXEC)
1036                        ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1037                else
1038                        send_sig(SIGTRAP, current, 0);
1039        }
1040        retval = 0;
1041out:
1042        kfree(loc);
1043out_ret:
1044        return retval;
1045
1046        /* error cleanup */
1047out_free_dentry:
1048        allow_write_access(interpreter);
1049        if (interpreter)
1050                fput(interpreter);
1051out_free_interp:
1052        if (elf_interpreter)
1053                kfree(elf_interpreter);
1054out_free_file:
1055        sys_close(elf_exec_fileno);
1056out_free_fh:
1057        if (files)
1058                reset_files_struct(current, files);
1059out_free_ph:
1060        kfree(elf_phdata);
1061        current->flags &= ~PF_RELOCEXEC;
1062        current->flags |= old_relocexec;
1063        goto out;
1064}
1065
1066/* This is really simpleminded and specialized - we are loading an
1067   a.out library that is given an ELF header. */
1068
1069static int load_elf_library(struct file *file)
1070{
1071        struct elf_phdr *elf_phdata;
1072        struct elf_phdr *eppnt;
1073        unsigned long elf_bss, bss, len;
1074        int retval, error, i, j;
1075        struct elfhdr elf_ex;
1076
1077        error = -ENOEXEC;
1078        retval = kernel_read(file, 0, (char *) &elf_ex, sizeof(elf_ex));
1079        if (retval != sizeof(elf_ex))
1080                goto out;
1081
1082        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1083                goto out;
1084
1085        /* First of all, some simple consistency checks */
1086        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1087           !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1088                goto out;
1089
1090        /* Now read in all of the header information */
1091
1092        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1093        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1094
1095        error = -ENOMEM;
1096        elf_phdata = kmalloc(j, GFP_KERNEL);
1097        if (!elf_phdata)
1098                goto out;
1099
1100        eppnt = elf_phdata;
1101        error = -ENOEXEC;
1102        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1103        if (retval != j)
1104                goto out_free_ph;
1105
1106        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1107                if ((eppnt + i)->p_type == PT_LOAD)
1108                        j++;
1109        if (j != 1)
1110                goto out_free_ph;
1111
1112        while (eppnt->p_type != PT_LOAD)
1113                eppnt++;
1114
1115        /* Now use mmap to map the library into memory. */
1116        down_write(&current->mm->mmap_sem);
1117        error = do_mmap(file,
1118                        ELF_PAGESTART(eppnt->p_vaddr),
1119                        (eppnt->p_filesz +
1120                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1121                        PROT_READ | PROT_WRITE | PROT_EXEC,
1122                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1123                        (eppnt->p_offset -
1124                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1125        up_write(&current->mm->mmap_sem);
1126        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1127                goto out_free_ph;
1128
1129        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1130        padzero(elf_bss);
1131
1132        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + ELF_MIN_ALIGN - 1);
1133        bss = eppnt->p_memsz + eppnt->p_vaddr;
1134        if (bss > len)
1135                do_brk_locked(len, bss - len);
1136        error = 0;
1137
1138out_free_ph:
1139        kfree(elf_phdata);
1140out:
1141        return error;
1142}
1143
1144/*
1145 * Note that some platforms still use traditional core dumps and not
1146 * the ELF core dump.  Each platform can select it as appropriate.
1147 */
1148#ifdef USE_ELF_CORE_DUMP
1149
1150/*
1151 * ELF core dumper
1152 *
1153 * Modelled on fs/exec.c:aout_core_dump()
1154 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1155 */
1156/*
1157 * These are the only things you should do on a core-file: use only these
1158 * functions to write out all the necessary info.
1159 */
1160static int dump_write(struct file *file, const void *addr, int nr)
1161{
1162        return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1163}
1164
1165static int dump_seek(struct file *file, loff_t off)
1166{
1167        if (file->f_op->llseek) {
1168                if (file->f_op->llseek(file, off, 0) != off)
1169                        return 0;
1170        } else
1171                file->f_pos = off;
1172        return 1;
1173}
1174
1175/*
1176 * Decide what to dump of a segment, part, all or none.
1177 */
1178static unsigned long vma_dump_size(struct vm_area_struct *vma,
1179                                   unsigned long mm_flags)
1180{
1181        /* Do not dump I/O mapped devices or special mappings */
1182        if (vma->vm_flags & (VM_IO | VM_RESERVED))
1183                return 0;
1184
1185#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1186
1187        /* By default, dump shared memory if mapped from an anonymous file. */
1188        if (vma->vm_flags & VM_SHARED) {
1189                if (vma->vm_file->f_dentry->d_inode->i_nlink == 0 ?
1190                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1191                        goto whole;
1192                return 0;
1193        }
1194
1195        /* Dump segments that have been written to.  */
1196        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1197                goto whole;
1198        if (vma->vm_file == NULL)
1199                return 0;
1200
1201        if (FILTER(MAPPED_PRIVATE))
1202                goto whole;
1203
1204        /*
1205         * If this looks like the beginning of a DSO or executable mapping,
1206         * check for an ELF header.  If we find one, dump the first page to
1207         * aid in determining what was mapped here.
1208         */
1209        if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1210                u32 __user *header = (u32 __user *) vma->vm_start;
1211                u32 word;
1212                /*
1213                 * Doing it this way gets the constant folded by GCC.
1214                 */
1215                union {
1216                        u32 cmp;
1217                        char elfmag[SELFMAG];
1218                } magic;
1219                BUILD_BUG_ON(SELFMAG != sizeof word);
1220                magic.elfmag[EI_MAG0] = ELFMAG0;
1221                magic.elfmag[EI_MAG1] = ELFMAG1;
1222                magic.elfmag[EI_MAG2] = ELFMAG2;
1223                magic.elfmag[EI_MAG3] = ELFMAG3;
1224                if (get_user(word, header) == 0 && word == magic.cmp)
1225                        return PAGE_SIZE;
1226        }
1227
1228#undef  FILTER
1229
1230        return 0;
1231
1232whole:
1233        return vma->vm_end - vma->vm_start;
1234}
1235
1236/* An ELF note in memory */
1237struct memelfnote
1238{
1239        const char *name;
1240        int type;
1241        unsigned int datasz;
1242        void *data;
1243};
1244
1245static int notesize(struct memelfnote *en)
1246{
1247        int sz;
1248
1249        sz = sizeof(struct elf_note);
1250        sz += roundup(strlen(en->name) + 1, 4);
1251        sz += roundup(en->datasz, 4);
1252
1253        return sz;
1254}
1255
1256#define DUMP_WRITE(addr, nr)    \
1257        do { if (!dump_write(file, (addr), (nr))) return 0; } while(0)
1258#define DUMP_SEEK(off)  \
1259        do { if (!dump_seek(file, (off))) return 0; } while(0)
1260
1261static int writenote(struct memelfnote *men, struct file *file)
1262{
1263        struct elf_note en;
1264
1265        en.n_namesz = strlen(men->name) + 1;
1266        en.n_descsz = men->datasz;
1267        en.n_type = men->type;
1268
1269        DUMP_WRITE(&en, sizeof(en));
1270        DUMP_WRITE(men->name, en.n_namesz);
1271        /* XXX - cast from long long to long to avoid need for libgcc.a */
1272        DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));      /* XXX */
1273        DUMP_WRITE(men->data, men->datasz);
1274        DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));      /* XXX */
1275
1276        return 1;
1277}
1278#undef DUMP_WRITE
1279#undef DUMP_SEEK
1280
1281#define DUMP_WRITE(addr, nr)    \
1282        if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1283                goto end_coredump;
1284#define DUMP_SEEK(off)  \
1285        if (!dump_seek(file, (off))) \
1286                goto end_coredump;
1287
1288static inline void fill_elf_header(struct elfhdr *elf, int segs)
1289{
1290        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1291        elf->e_ident[EI_CLASS] = ELF_CLASS;
1292        elf->e_ident[EI_DATA] = ELF_DATA;
1293        elf->e_ident[EI_VERSION] = EV_CURRENT;
1294        elf->e_ident[EI_OSABI] = ELF_OSABI;
1295        memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1296
1297        elf->e_type = ET_CORE;
1298        elf->e_machine = ELF_ARCH;
1299        elf->e_version = EV_CURRENT;
1300        elf->e_entry = 0;
1301        elf->e_phoff = sizeof(struct elfhdr);
1302        elf->e_shoff = 0;
1303        elf->e_flags = 0;
1304        elf->e_ehsize = sizeof(struct elfhdr);
1305        elf->e_phentsize = sizeof(struct elf_phdr);
1306        elf->e_phnum = segs;
1307        elf->e_shentsize = 0;
1308        elf->e_shnum = 0;
1309        elf->e_shstrndx = 0;
1310        return;
1311}
1312
1313static inline void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1314{
1315        phdr->p_type = PT_NOTE;
1316        phdr->p_offset = offset;
1317        phdr->p_vaddr = 0;
1318        phdr->p_paddr = 0;
1319        phdr->p_filesz = sz;
1320        phdr->p_memsz = 0;
1321        phdr->p_flags = 0;
1322        phdr->p_align = 0;
1323        return;
1324}
1325
1326static void fill_note(struct memelfnote *note, const char *name, int type, 
1327                unsigned int sz, void *data)
1328{
1329        note->name = name;
1330        note->type = type;
1331        note->datasz = sz;
1332        note->data = data;
1333        return;
1334}
1335
1336/*
1337 * fill up all the fields in prstatus from the given task struct, except registers
1338 * which need to be filled up separately.
1339 */
1340static void fill_prstatus(struct elf_prstatus *prstatus,
1341                        struct task_struct *p, long signr) 
1342{
1343        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1344        prstatus->pr_sigpend = p->pending.signal.sig[0];
1345        prstatus->pr_sighold = p->blocked.sig[0];
1346        prstatus->pr_pid = p->pid;
1347        prstatus->pr_ppid = p->parent->pid;
1348        prstatus->pr_pgrp = process_group(p);
1349        prstatus->pr_sid = p->signal->session;
1350        if (p->pid == p->tgid) {
1351                /*
1352                 * This is the record for the group leader.  Add in the
1353                 * cumulative times of previous dead threads.  This total
1354                 * won't include the time of each live thread whose state
1355                 * is included in the core dump.  The final total reported
1356                 * to our parent process when it calls wait4 will include
1357                 * those sums as well as the little bit more time it takes
1358                 * this and each other thread to finish dying after the
1359                 * core dump synchronization phase.
1360                 */
1361                jiffies_to_timeval(p->utime + p->signal->utime,
1362                                   &prstatus->pr_utime);
1363                jiffies_to_timeval(p->stime + p->signal->stime,
1364                                   &prstatus->pr_stime);
1365        } else {
1366                jiffies_to_timeval(p->utime, &prstatus->pr_utime);
1367                jiffies_to_timeval(p->stime, &prstatus->pr_stime);
1368        }
1369        jiffies_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1370        jiffies_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1371}
1372
1373static void fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1374                        struct mm_struct *mm)
1375{
1376        unsigned int i, len;
1377        
1378        /* first copy the parameters from user space */
1379        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1380
1381        len = mm->arg_end - mm->arg_start;
1382        if (len >= ELF_PRARGSZ)
1383                len = ELF_PRARGSZ-1;
1384        copy_from_user(&psinfo->pr_psargs,
1385                       (const char __user *)mm->arg_start, len);
1386        for(i = 0; i < len; i++)
1387                if (psinfo->pr_psargs[i] == 0)
1388                        psinfo->pr_psargs[i] = ' ';
1389        psinfo->pr_psargs[len] = 0;
1390
1391        psinfo->pr_pid = p->pid;
1392        psinfo->pr_ppid = p->parent->pid;
1393        psinfo->pr_pgrp = process_group(p);
1394        psinfo->pr_sid = p->signal->session;
1395
1396        i = p->state ? ffz(~p->state) + 1 : 0;
1397        psinfo->pr_state = i;
1398        psinfo->pr_sname = (i < 0 || i > 5) ? '.' : "RSDTZW"[i];
1399        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1400        psinfo->pr_nice = task_nice(p);
1401        psinfo->pr_flag = p->flags;
1402        SET_UID(psinfo->pr_uid, p->uid);
1403        SET_GID(psinfo->pr_gid, p->gid);
1404        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1405        
1406        return;
1407}
1408
1409/* Here is the structure in which status of each thread is captured. */
1410struct elf_thread_status
1411{
1412        struct list_head list;
1413        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1414        elf_fpregset_t fpu;             /* NT_PRFPREG */
1415        struct task_struct *thread;
1416#ifdef ELF_CORE_COPY_XFPREGS
1417        elf_fpxregset_t xfpu;           /* NT_PRXFPREG */
1418#endif
1419        struct memelfnote notes[3];
1420        int num_notes;
1421};
1422
1423/*
1424 * In order to add the specific thread information for the elf file format,
1425 * we need to keep a linked list of every threads pr_status and then
1426 * create a single section for them in the final core file.
1427 */
1428static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1429{
1430        int sz = 0;
1431        struct task_struct *p = t->thread;
1432        t->num_notes = 0;
1433
1434        fill_prstatus(&t->prstatus, p, signr);
1435        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1436        
1437        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), &(t->prstatus));
1438        t->num_notes++;
1439        sz += notesize(&t->notes[0]);
1440
1441        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, &t->fpu))) {
1442                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), &(t->fpu));
1443                t->num_notes++;
1444                sz += notesize(&t->notes[1]);
1445        }
1446
1447#ifdef ELF_CORE_COPY_XFPREGS
1448        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1449                fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu), &t->xfpu);
1450                t->num_notes++;
1451                sz += notesize(&t->notes[2]);
1452        }
1453#endif  
1454        return sz;
1455}
1456
1457/*
1458 * Actual dumper
1459 *
1460 * This is a two-pass process; first we find the offsets of the bits,
1461 * and then they are actually written out.  If we run out of core limit
1462 * we just truncate.
1463 */
1464static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
1465{
1466#define NUM_NOTES       6
1467        int has_dumped = 0;
1468        mm_segment_t fs;
1469        int segs;
1470        size_t size = 0;
1471        int i;
1472        struct vm_area_struct *vma;
1473        struct elfhdr *elf = NULL;
1474        loff_t offset = 0, dataoff;
1475        unsigned long limit = current->rlim[RLIMIT_CORE].rlim_cur;
1476        int numnote;
1477        struct memelfnote *notes = NULL;
1478        struct elf_prstatus *prstatus = NULL;   /* NT_PRSTATUS */
1479        struct elf_prpsinfo *psinfo = NULL;     /* NT_PRPSINFO */
1480        struct task_struct *g, *p;
1481        LIST_HEAD(thread_list);
1482        struct list_head *t;
1483        elf_fpregset_t *fpu = NULL;
1484#ifdef ELF_CORE_COPY_XFPREGS
1485        elf_fpxregset_t *xfpu = NULL;
1486#endif
1487        int thread_status_size = 0;
1488        elf_addr_t *auxv;
1489        unsigned long mm_flags;
1490
1491        /*
1492         * We no longer stop all VM operations.
1493         * 
1494         * This is because those proceses that could possibly change map_count or
1495         * the mmap / vma pages are now blocked in do_exit on current finishing
1496         * this core dump.
1497         *
1498         * Only ptrace can touch these memory addresses, but it doesn't change
1499         * the map_count or the pages allocated.  So no possibility of crashing
1500         * exists while dumping the mm->vm_next areas to the core file.
1501         */
1502  
1503        /* alloc memory for large data structures: too large to be on stack */
1504        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1505        if (!elf)
1506                goto cleanup;
1507        prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1508        if (!prstatus)
1509                goto cleanup;
1510        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1511        if (!psinfo)
1512                goto cleanup;
1513        notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1514        if (!notes)
1515                goto cleanup;
1516        fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1517        if (!fpu)
1518                goto cleanup;
1519#ifdef ELF_CORE_COPY_XFPREGS
1520        xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1521        if (!xfpu)
1522                goto cleanup;
1523#endif
1524
1525        if (signr) {
1526                struct elf_thread_status *tmp;
1527                read_lock(&tasklist_lock);
1528                do_each_thread(g,p)
1529                        if (current->mm == p->mm && current != p) {
1530                                tmp = kmalloc(sizeof(*tmp), GFP_ATOMIC);
1531                                if (!tmp) {
1532                                        read_unlock(&tasklist_lock);
1533                                        goto cleanup;
1534                                }
1535                                memset(tmp, 0, sizeof(*tmp));
1536                                INIT_LIST_HEAD(&tmp->list);
1537                                tmp->thread = p;
1538                                list_add(&tmp->list, &thread_list);
1539                        }
1540                while_each_thread(g,p);
1541                read_unlock(&tasklist_lock);
1542                list_for_each(t, &thread_list) {
1543                        struct elf_thread_status *tmp;
1544                        int sz;
1545
1546                        tmp = list_entry(t, struct elf_thread_status, list);
1547                        sz = elf_dump_thread_status(signr, tmp);
1548                        thread_status_size += sz;
1549                }
1550        }
1551        /* now collect the dump for the current */
1552        memset(prstatus, 0, sizeof(*prstatus));
1553        fill_prstatus(prstatus, current, signr);
1554        elf_core_copy_regs(&prstatus->pr_reg, regs);
1555        
1556        segs = current->mm->map_count;
1557#ifdef ELF_CORE_EXTRA_PHDRS
1558        segs += ELF_CORE_EXTRA_PHDRS;
1559#endif
1560
1561        /* Set up header */
1562        fill_elf_header(elf, segs+1);   /* including notes section */
1563
1564        has_dumped = 1;
1565        current->flags |= PF_DUMPCORE;
1566
1567        /*
1568         * Set up the notes in similar form to SVR4 core dumps made
1569         * with info from their /proc.
1570         */
1571
1572        fill_note(notes +0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1573        
1574        fill_psinfo(psinfo, current->group_leader, current->mm);
1575        fill_note(notes +1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1576        
1577        fill_note(notes +2, "CORE", NT_TASKSTRUCT, sizeof(*current), current);
1578  
1579        numnote = 3;
1580
1581        auxv = (elf_addr_t *) current->mm->saved_auxv;
1582
1583        i = 0;
1584        do
1585                i += 2;
1586        while (auxv[i - 2] != AT_NULL);
1587        fill_note(&notes[numnote++], "CORE", NT_AUXV,
1588                  i * sizeof (elf_addr_t), auxv);
1589
1590        /* Try to dump the FPU. */
1591        if ((prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, fpu)))
1592                fill_note(notes + numnote++,
1593                          "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1594#ifdef ELF_CORE_COPY_XFPREGS
1595        if (elf_core_copy_task_xfpregs(current, xfpu))
1596                fill_note(notes + numnote++,
1597                          "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1598#endif  
1599  
1600        fs = get_fs();
1601        set_fs(KERNEL_DS);
1602
1603        DUMP_WRITE(elf, sizeof(*elf));
1604        offset += sizeof(*elf);                         /* Elf header */
1605        offset += (segs+1) * sizeof(struct elf_phdr);   /* Program headers */
1606
1607        /* Write notes phdr entry */
1608        {
1609                struct elf_phdr phdr;
1610                int sz = 0;
1611
1612                for (i = 0; i < numnote; i++)
1613                        sz += notesize(notes + i);
1614                
1615                sz += thread_status_size;
1616
1617                fill_elf_note_phdr(&phdr, sz, offset);
1618                offset += sz;
1619                DUMP_WRITE(&phdr, sizeof(phdr));
1620        }
1621
1622        /* Page-align dumped data */
1623        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1624
1625        /*
1626         * We must use the same mm_flags while dumping core to avoid
1627         * inconsistency between the program headers and bodies, otherwise an
1628         * unusable core file can be generated.
1629         */
1630        mm_flags = get_mm_flags(current->mm);
1631
1632        /* Write program headers for segments dump */
1633        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1634                struct elf_phdr phdr;
1635
1636                phdr.p_type = PT_LOAD;
1637                phdr.p_offset = offset;
1638                phdr.p_vaddr = vma->vm_start;
1639                phdr.p_paddr = 0;
1640                phdr.p_filesz = vma_dump_size(vma, mm_flags);
1641                phdr.p_memsz = vma->vm_end - vma->vm_start;
1642                offset += phdr.p_filesz;
1643                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1644                if (vma->vm_flags & VM_WRITE) phdr.p_flags |= PF_W;
1645                if (vma->vm_flags & VM_EXEC) phdr.p_flags |= PF_X;
1646                phdr.p_align = ELF_EXEC_PAGESIZE;
1647
1648                DUMP_WRITE(&phdr, sizeof(phdr));
1649        }
1650
1651#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1652        ELF_CORE_WRITE_EXTRA_PHDRS;
1653#endif
1654
1655        /* write out the notes section */
1656        for (i = 0; i < numnote; i++)
1657                if (!writenote(notes + i, file))
1658                        goto end_coredump;
1659
1660        /* write out the thread status notes section */
1661        list_for_each(t, &thread_list) {
1662                struct elf_thread_status *tmp = list_entry(t, struct elf_thread_status, list);
1663                for (i = 0; i < tmp->num_notes; i++)
1664                        if (!writenote(&tmp->notes[i], file))
1665                                goto end_coredump;
1666        }
1667 
1668        DUMP_SEEK(dataoff);
1669
1670        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1671                unsigned long addr;
1672                unsigned long end;
1673
1674                end = vma->vm_start + vma_dump_size(vma, mm_flags);
1675
1676                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
1677                        struct page* page;
1678                        struct vm_area_struct *vma;
1679
1680                        if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1681                                                &page, &vma) <= 0) {
1682                                DUMP_SEEK (file->f_pos + PAGE_SIZE);
1683                        } else {
1684                                if (page == ZERO_PAGE(addr)) {
1685                                        DUMP_SEEK (file->f_pos + PAGE_SIZE);
1686                                } else {
1687                                        void *kaddr;
1688                                        flush_cache_page(vma, addr);
1689                                        kaddr = kmap(page);
1690                                        if ((size += PAGE_SIZE) > limit ||
1691                                            !dump_write(file, kaddr,
1692                                            PAGE_SIZE)) {
1693                                                kunmap(page);
1694                                                page_cache_release(page);
1695                                                goto end_coredump;
1696                                        }
1697                                        kunmap(page);
1698                                }
1699                                page_cache_release(page);
1700                        }
1701                }
1702        }
1703
1704#ifdef ELF_CORE_WRITE_EXTRA_DATA
1705        ELF_CORE_WRITE_EXTRA_DATA;
1706#endif
1707
1708        if (file->f_pos != offset) {
1709                /* Sanity check */
1710                printk("elf_core_dump: file->f_pos (%Ld) != offset (%Ld)\n",
1711                       file->f_pos, offset);
1712        }
1713
1714end_coredump:
1715        set_fs(fs);
1716
1717cleanup:
1718        while(!list_empty(&thread_list)) {
1719                struct list_head *tmp = thread_list.next;
1720                list_del(tmp);
1721                kfree(list_entry(tmp, struct elf_thread_status, list));
1722        }
1723
1724        kfree(elf);
1725        kfree(prstatus);
1726        kfree(psinfo);
1727        kfree(notes);
1728        kfree(fpu);
1729#ifdef ELF_CORE_COPY_XFPREGS
1730        kfree(xfpu);
1731#endif
1732        return has_dumped;
1733#undef NUM_NOTES
1734}
1735
1736#endif          /* USE_ELF_CORE_DUMP */
1737
1738static int __init init_elf_binfmt(void)
1739{
1740        return register_binfmt(&elf_format);
1741}
1742
1743static void __exit exit_elf_binfmt(void)
1744{
1745        /* Remove the COFF and ELF loaders. */
1746        unregister_binfmt(&elf_format);
1747}
1748
1749core_initcall(init_elf_binfmt);
1750module_exit(exit_elf_binfmt);
1751MODULE_LICENSE("GPL");
1752