RHEL5/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/stat.h>
  16#include <linux/time.h>
  17#include <linux/mm.h>
  18#include <linux/mman.h>
  19#include <linux/a.out.h>
  20#include <linux/errno.h>
  21#include <linux/signal.h>
  22#include <linux/binfmts.h>
  23#include <linux/string.h>
  24#include <linux/file.h>
  25#include <linux/fcntl.h>
  26#include <linux/ptrace.h>
  27#include <linux/slab.h>
  28#include <linux/shm.h>
  29#include <linux/personality.h>
  30#include <linux/elfcore.h>
  31#include <linux/init.h>
  32#include <linux/highuid.h>
  33#include <linux/smp.h>
  34#include <linux/smp_lock.h>
  35#include <linux/compiler.h>
  36#include <linux/highmem.h>
  37#include <linux/pagemap.h>
  38#include <linux/security.h>
  39#include <linux/syscalls.h>
  40#include <linux/random.h>
  41#include <linux/elf.h>
  42#include <asm/uaccess.h>
  43#include <asm/param.h>
  44#include <asm/page.h>
  45
  46static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
  47static int load_elf_library(struct file *);
  48static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
  49extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
  50
  51#ifndef elf_addr_t
  52#define elf_addr_t unsigned long
  53#endif
  54
  55/*
  56 * If we don't support core dumping, then supply a NULL so we
  57 * don't even try.
  58 */
  59#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
  60static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
  61#else
  62#define elf_core_dump   NULL
  63#endif
  64
  65#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  66#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  67#else
  68#define ELF_MIN_ALIGN   PAGE_SIZE
  69#endif
  70
  71#ifndef ELF_CORE_EFLAGS
  72#define ELF_CORE_EFLAGS 0
  73#endif
  74
  75#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  76#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  77#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  78
  79static struct linux_binfmt elf_format = {
  80                .module         = THIS_MODULE,
  81                .load_binary    = load_elf_binary,
  82                .load_shlib     = load_elf_library,
  83                .core_dump      = elf_core_dump,
  84                .min_coredump   = ELF_EXEC_PAGESIZE
  85};
  86
  87#define BAD_ADDR(x) ((unsigned long)(x) >= PAGE_MASK)
  88
  89static int set_brk(unsigned long start, unsigned long end)
  90{
  91        start = ELF_PAGEALIGN(start);
  92        end = ELF_PAGEALIGN(end);
  93        if (end > start) {
  94                unsigned long addr;
  95                down_write(&current->mm->mmap_sem);
  96                addr = do_brk(start, end - start);
  97                up_write(&current->mm->mmap_sem);
  98                if (BAD_ADDR(addr))
  99                        return addr;
 100        }
 101        current->mm->start_brk = current->mm->brk = end;
 102        return 0;
 103}
 104
 105/* We need to explicitly zero any fractional pages
 106   after the data section (i.e. bss).  This would
 107   contain the junk from the file that should not
 108   be in memory
 109 */
 110static int padzero(unsigned long elf_bss)
 111{
 112        unsigned long nbyte;
 113
 114        nbyte = ELF_PAGEOFFSET(elf_bss);
 115        if (nbyte) {
 116                nbyte = ELF_MIN_ALIGN - nbyte;
 117                if (clear_user((void __user *) elf_bss, nbyte))
 118                        return -EFAULT;
 119        }
 120        return 0;
 121}
 122
 123/* Let's use some macros to make this stack manipulation a litle clearer */
 124#ifdef CONFIG_STACK_GROWSUP
 125#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 126#define STACK_ROUND(sp, items) \
 127        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 128#define STACK_ALLOC(sp, len) ({ \
 129        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 130        old_sp; })
 131#else
 132#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 133#define STACK_ROUND(sp, items) \
 134        (((unsigned long) (sp - items)) &~ 15UL)
 135#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 136#endif
 137
 138static int
 139create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 140                int interp_aout, unsigned long load_addr,
 141                unsigned long interp_load_addr)
 142{
 143        unsigned long p = bprm->p;
 144        int argc = bprm->argc;
 145        int envc = bprm->envc;
 146        elf_addr_t __user *argv;
 147        elf_addr_t __user *envp;
 148        elf_addr_t __user *sp;
 149        elf_addr_t __user *u_platform;
 150        const char *k_platform = ELF_PLATFORM;
 151        int items;
 152        elf_addr_t *elf_info;
 153        int ei_index = 0;
 154        struct task_struct *tsk = current;
 155
 156        /*
 157         * If this architecture has a platform capability string, copy it
 158         * to userspace.  In some cases (Sparc), this info is impossible
 159         * for userspace to get any other way, in others (i386) it is
 160         * merely difficult.
 161         */
 162        u_platform = NULL;
 163        if (k_platform) {
 164                size_t len = strlen(k_platform) + 1;
 165
 166                /*
 167                 * In some cases (e.g. Hyper-Threading), we want to avoid L1
 168                 * evictions by the processes running on the same package. One
 169                 * thing we can do is to shuffle the initial stack for them.
 170                 */
 171
 172                p = arch_align_stack(p);
 173
 174                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 175                if (__copy_to_user(u_platform, k_platform, len))
 176                        return -EFAULT;
 177        }
 178
 179        /* Create the ELF interpreter info */
 180        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 181#define NEW_AUX_ENT(id, val) \
 182        do { \
 183                elf_info[ei_index++] = id; \
 184                elf_info[ei_index++] = val; \
 185        } while (0)
 186
 187#ifdef ARCH_DLINFO
 188        /* 
 189         * ARCH_DLINFO must come first so PPC can do its special alignment of
 190         * AUXV.
 191         */
 192        ARCH_DLINFO;
 193#endif
 194        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 195        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 196        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 197        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 198        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 199        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 200        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 201        NEW_AUX_ENT(AT_FLAGS, 0);
 202        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 203        NEW_AUX_ENT(AT_UID, tsk->uid);
 204        NEW_AUX_ENT(AT_EUID, tsk->euid);
 205        NEW_AUX_ENT(AT_GID, tsk->gid);
 206        NEW_AUX_ENT(AT_EGID, tsk->egid);
 207        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 208        if (k_platform) {
 209                NEW_AUX_ENT(AT_PLATFORM,
 210                            (elf_addr_t)(unsigned long)u_platform);
 211        }
 212        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 213                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 214        }
 215#undef NEW_AUX_ENT
 216        /* AT_NULL is zero; clear the rest too */
 217        memset(&elf_info[ei_index], 0,
 218               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 219
 220        /* And advance past the AT_NULL entry.  */
 221        ei_index += 2;
 222
 223        sp = STACK_ADD(p, ei_index);
 224
 225        items = (argc + 1) + (envc + 1);
 226        if (interp_aout) {
 227                items += 3; /* a.out interpreters require argv & envp too */
 228        } else {
 229                items += 1; /* ELF interpreters only put argc on the stack */
 230        }
 231        bprm->p = STACK_ROUND(sp, items);
 232
 233        /* Point sp at the lowest address on the stack */
 234#ifdef CONFIG_STACK_GROWSUP
 235        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 236        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 237#else
 238        sp = (elf_addr_t __user *)bprm->p;
 239#endif
 240
 241        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 242        if (__put_user(argc, sp++))
 243                return -EFAULT;
 244        if (interp_aout) {
 245                argv = sp + 2;
 246                envp = argv + argc + 1;
 247                __put_user((elf_addr_t)(unsigned long)argv, sp++);
 248                __put_user((elf_addr_t)(unsigned long)envp, sp++);
 249        } else {
 250                argv = sp;
 251                envp = argv + argc + 1;
 252        }
 253
 254        /* Populate argv and envp */
 255        p = current->mm->arg_end = current->mm->arg_start;
 256        while (argc-- > 0) {
 257                size_t len;
 258                __put_user((elf_addr_t)p, argv++);
 259                len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 260                if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
 261                        return 0;
 262                p += len;
 263        }
 264        if (__put_user(0, argv))
 265                return -EFAULT;
 266        current->mm->arg_end = current->mm->env_start = p;
 267        while (envc-- > 0) {
 268                size_t len;
 269                __put_user((elf_addr_t)p, envp++);
 270                len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 271                if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
 272                        return 0;
 273                p += len;
 274        }
 275        if (__put_user(0, envp))
 276                return -EFAULT;
 277        current->mm->env_end = p;
 278
 279        /* Put the elf_info on the stack in the right place.  */
 280        sp = (elf_addr_t __user *)envp + 1;
 281        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 282                return -EFAULT;
 283        return 0;
 284}
 285
 286#ifndef elf_map
 287
 288static unsigned long elf_map(struct file *filep, unsigned long addr,
 289                struct elf_phdr *eppnt, int prot, int type,
 290                unsigned long total_size)
 291{
 292        unsigned long map_addr;
 293        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 294        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 295
 296        addr = ELF_PAGESTART(addr);
 297        size = ELF_PAGEALIGN(size);
 298
 299        /* mmap() will return -EINVAL if given a zero size, but a
 300         * segment with zero filesize is perfectly valid */
 301        if (!size)
 302                return addr;
 303
 304        down_write(&current->mm->mmap_sem);
 305        /*
 306        * total_size is the size of the ELF (interpreter) image.
 307        * The _first_ mmap needs to know the full size, otherwise
 308        * randomization might put this image into an overlapping
 309        * position with the ELF binary image. (since size < total_size)
 310        * So we first map the 'big' image - and unmap the remainder at
 311        * the end. (which unmap is needed for ELF images with holes.)
 312        */
 313        if (total_size) {
 314                total_size = ELF_PAGEALIGN(total_size);
 315                map_addr = do_mmap(filep, addr, total_size, prot, type, off);
 316                if (!BAD_ADDR(map_addr))
 317                        do_munmap(current->mm, map_addr+size, total_size-size);
 318        } else
 319                map_addr = do_mmap(filep, addr, size, prot, type, off);
 320
 321        up_write(&current->mm->mmap_sem);
 322        return(map_addr);
 323}
 324
 325#endif /* !elf_map */
 326
 327static inline unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 328{
 329        int i, first_idx = -1, last_idx = -1;
 330
 331        for (i = 0; i < nr; i++)
 332                if (cmds[i].p_type == PT_LOAD) {
 333                        last_idx = i;
 334                        if (first_idx == -1)
 335                                first_idx = i;
 336                }
 337
 338        if (first_idx == -1)
 339                return 0;
 340
 341        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 342                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 343}
 344
 345
 346/* This is much more generalized than the library routine read function,
 347   so we keep this separate.  Technically the library read function
 348   is only provided so that we can read a.out libraries that have
 349   an ELF header */
 350
 351static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 352                struct file *interpreter, unsigned long *interp_map_addr,
 353                unsigned long no_base)
 354{
 355        struct elf_phdr *elf_phdata;
 356        struct elf_phdr *eppnt;
 357        unsigned long load_addr = 0;
 358        int load_addr_set = 0;
 359        unsigned long last_bss = 0, elf_bss = 0;
 360        unsigned long error = ~0UL;
 361        unsigned long total_size;
 362        int retval, i, size;
 363
 364        /* First of all, some simple consistency checks */
 365        if (interp_elf_ex->e_type != ET_EXEC &&
 366            interp_elf_ex->e_type != ET_DYN)
 367                goto out;
 368        if (!elf_check_arch(interp_elf_ex))
 369                goto out;
 370        if (!interpreter->f_op || !interpreter->f_op->mmap)
 371                goto out;
 372
 373        /*
 374         * If the size of this structure has changed, then punt, since
 375         * we will be doing the wrong thing.
 376         */
 377        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 378                goto out;
 379        if (interp_elf_ex->e_phnum < 1 ||
 380                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 381                goto out;
 382
 383        /* Now read in all of the header information */
 384        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 385        if (size > ELF_MIN_ALIGN)
 386                goto out;
 387        elf_phdata = kmalloc(size, GFP_KERNEL);
 388        if (!elf_phdata)
 389                goto out;
 390
 391        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 392                             (char *)elf_phdata,size);
 393        error = -EIO;
 394        if (retval != size) {
 395                if (retval < 0)
 396                        error = retval; 
 397                goto out_close;
 398        }
 399
 400        total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 401        if (!total_size)
 402                goto out_close;
 403
 404        eppnt = elf_phdata;
 405        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 406                if (eppnt->p_type == PT_LOAD) {
 407                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 408                        int elf_prot = 0;
 409                        unsigned long vaddr = 0;
 410                        unsigned long k, map_addr;
 411
 412                        if (eppnt->p_flags & PF_R)
 413                                elf_prot = PROT_READ;
 414                        if (eppnt->p_flags & PF_W)
 415                                elf_prot |= PROT_WRITE;
 416                        if (eppnt->p_flags & PF_X)
 417                                elf_prot |= PROT_EXEC;
 418                        vaddr = eppnt->p_vaddr;
 419                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 420                                elf_type |= MAP_FIXED;
 421                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 422                                load_addr = -vaddr;
 423
 424                        map_addr = elf_map(interpreter, load_addr + vaddr,
 425                                           eppnt, elf_prot, elf_type, total_size);
 426                        total_size = 0;
 427                        if (!*interp_map_addr)
 428                                *interp_map_addr = map_addr;
 429                        error = map_addr;
 430                        if (BAD_ADDR(map_addr))
 431                                goto out_close;
 432
 433                        if (!load_addr_set &&
 434                            interp_elf_ex->e_type == ET_DYN) {
 435                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 436                                load_addr_set = 1;
 437                        }
 438
 439                        /*
 440                         * Check to see if the section's size will overflow the
 441                         * allowed task size. Note that p_filesz must always be
 442                         * <= p_memsize so it's only necessary to check p_memsz.
 443                         */
 444                        k = load_addr + eppnt->p_vaddr;
 445                        if (BAD_ADDR(k) ||
 446                            eppnt->p_filesz > eppnt->p_memsz ||
 447                            eppnt->p_memsz > TASK_SIZE ||
 448                            TASK_SIZE - eppnt->p_memsz < k) {
 449                                error = -ENOMEM;
 450                                goto out_close;
 451                        }
 452
 453                        /*
 454                         * Find the end of the file mapping for this phdr, and
 455                         * keep track of the largest address we see for this.
 456                         */
 457                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 458                        if (k > elf_bss)
 459                                elf_bss = k;
 460
 461                        /*
 462                         * Do the same thing for the memory mapping - between
 463                         * elf_bss and last_bss is the bss section.
 464                         */
 465                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 466                        if (k > last_bss)
 467                                last_bss = k;
 468                }
 469        }
 470
 471        /*
 472         * Now fill out the bss section.  First pad the last page up
 473         * to the page boundary, and then perform a mmap to make sure
 474         * that there are zero-mapped pages up to and including the 
 475         * last bss page.
 476         */
 477        if (padzero(elf_bss)) {
 478                error = -EFAULT;
 479                goto out_close;
 480        }
 481
 482        /* What we have mapped so far */
 483        elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 484
 485        /* Map the last of the bss segment */
 486        if (last_bss > elf_bss) {
 487                down_write(&current->mm->mmap_sem);
 488                error = do_brk(elf_bss, last_bss - elf_bss);
 489                up_write(&current->mm->mmap_sem);
 490                if (BAD_ADDR(error))
 491                        goto out_close;
 492        }
 493
 494        error = load_addr;
 495
 496out_close:
 497        kfree(elf_phdata);
 498out:
 499        return error;
 500}
 501
 502static unsigned long load_aout_interp(struct exec *interp_ex,
 503                struct file *interpreter)
 504{
 505        unsigned long text_data, elf_entry = ~0UL;
 506        char __user * addr;
 507        loff_t offset;
 508
 509        current->mm->end_code = interp_ex->a_text;
 510        text_data = interp_ex->a_text + interp_ex->a_data;
 511        current->mm->end_data = text_data;
 512        current->mm->brk = interp_ex->a_bss + text_data;
 513
 514        switch (N_MAGIC(*interp_ex)) {
 515        case OMAGIC:
 516                offset = 32;
 517                addr = (char __user *)0;
 518                break;
 519        case ZMAGIC:
 520        case QMAGIC:
 521                offset = N_TXTOFF(*interp_ex);
 522                addr = (char __user *)N_TXTADDR(*interp_ex);
 523                break;
 524        default:
 525                goto out;
 526        }
 527
 528        down_write(&current->mm->mmap_sem);     
 529        do_brk(0, text_data);
 530        up_write(&current->mm->mmap_sem);
 531        if (!interpreter->f_op || !interpreter->f_op->read)
 532                goto out;
 533        if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
 534                goto out;
 535        flush_icache_range((unsigned long)addr,
 536                           (unsigned long)addr + text_data);
 537
 538        down_write(&current->mm->mmap_sem);     
 539        do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
 540                interp_ex->a_bss);
 541        up_write(&current->mm->mmap_sem);
 542        elf_entry = interp_ex->a_entry;
 543
 544out:
 545        return elf_entry;
 546}
 547
 548/*
 549 * These are the functions used to load ELF style executables and shared
 550 * libraries.  There is no binary dependent code anywhere else.
 551 */
 552
 553#define INTERPRETER_NONE 0
 554#define INTERPRETER_AOUT 1
 555#define INTERPRETER_ELF 2
 556
 557#ifndef STACK_RND_MASK
 558#define STACK_RND_MASK 0x7ff            /* with 4K pages 8MB of VA */
 559#endif
 560
 561static unsigned long randomize_stack_top(unsigned long stack_top)
 562{
 563        unsigned int random_variable = 0;
 564
 565        if (current->flags & PF_RANDOMIZE) {
 566                random_variable = get_random_int() & STACK_RND_MASK;
 567                random_variable <<= PAGE_SHIFT;
 568        }
 569#ifdef CONFIG_STACK_GROWSUP
 570        return PAGE_ALIGN(stack_top) + random_variable;
 571#else
 572        return PAGE_ALIGN(stack_top) - random_variable;
 573#endif
 574}
 575
 576static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 577{
 578        struct file *interpreter = NULL; /* to shut gcc up */
 579        unsigned long load_addr = 0, load_bias = 0;
 580        int load_addr_set = 0;
 581        char * elf_interpreter = NULL;
 582        unsigned int interpreter_type = INTERPRETER_NONE;
 583        unsigned char ibcs2_interpreter = 0;
 584        unsigned long error;
 585        struct elf_phdr *elf_ppnt, *elf_phdata;
 586        unsigned long elf_bss, elf_brk;
 587        int elf_exec_fileno;
 588        int retval, i;
 589        unsigned int size;
 590        unsigned long elf_entry, interp_load_addr = 0, interp_map_addr = 0;
 591        unsigned long start_code, end_code, start_data, end_data;
 592        unsigned long reloc_func_desc = 0;
 593        char passed_fileno[6];
 594        struct files_struct *files;
 595        int have_pt_gnu_stack, executable_stack;
 596        unsigned long def_flags = 0;
 597        struct {
 598                struct elfhdr elf_ex;
 599                struct elfhdr interp_elf_ex;
 600                struct exec interp_ex;
 601        } *loc;
 602
 603        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 604        if (!loc) {
 605                retval = -ENOMEM;
 606                goto out_ret;
 607        }
 608        
 609        /* Get the exec-header */
 610        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 611
 612        retval = -ENOEXEC;
 613        /* First of all, some simple consistency checks */
 614        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 615                goto out;
 616
 617        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 618                goto out;
 619        if (!elf_check_arch(&loc->elf_ex))
 620                goto out;
 621        if (!bprm->file->f_op||!bprm->file->f_op->mmap)
 622                goto out;
 623
 624        /* Now read in all of the header information */
 625        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 626                goto out;
 627        if (loc->elf_ex.e_phnum < 1 ||
 628                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 629                goto out;
 630        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 631        retval = -ENOMEM;
 632        elf_phdata = kmalloc(size, GFP_KERNEL);
 633        if (!elf_phdata)
 634                goto out;
 635
 636        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 637                             (char *)elf_phdata, size);
 638        if (retval != size) {
 639                if (retval >= 0)
 640                        retval = -EIO;
 641                goto out_free_ph;
 642        }
 643
 644        files = current->files; /* Refcounted so ok */
 645        retval = unshare_files();
 646        if (retval < 0)
 647                goto out_free_ph;
 648        if (files == current->files) {
 649                put_files_struct(files);
 650                files = NULL;
 651        }
 652
 653        /* exec will make our files private anyway, but for the a.out
 654           loader stuff we need to do it earlier */
 655        retval = get_unused_fd();
 656        if (retval < 0)
 657                goto out_free_fh;
 658        get_file(bprm->file);
 659        fd_install(elf_exec_fileno = retval, bprm->file);
 660
 661        elf_ppnt = elf_phdata;
 662        elf_bss = 0;
 663        elf_brk = 0;
 664
 665        start_code = ~0UL;
 666        end_code = 0;
 667        start_data = 0;
 668        end_data = 0;
 669
 670        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 671                if (elf_ppnt->p_type == PT_INTERP) {
 672                        /* This is the program interpreter used for
 673                         * shared libraries - for now assume that this
 674                         * is an a.out format binary
 675                         */
 676                        retval = -ENOEXEC;
 677                        if (elf_ppnt->p_filesz > PATH_MAX || 
 678                            elf_ppnt->p_filesz < 2)
 679                                goto out_free_file;
 680
 681                        retval = -ENOMEM;
 682                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 683                                                  GFP_KERNEL);
 684                        if (!elf_interpreter)
 685                                goto out_free_file;
 686
 687                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 688                                             elf_interpreter,
 689                                             elf_ppnt->p_filesz);
 690                        if (retval != elf_ppnt->p_filesz) {
 691                                if (retval >= 0)
 692                                        retval = -EIO;
 693                                goto out_free_interp;
 694                        }
 695                        /* make sure path is NULL terminated */
 696                        retval = -ENOEXEC;
 697                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 698                                goto out_free_interp;
 699
 700                        /* If the program interpreter is one of these two,
 701                         * then assume an iBCS2 image. Otherwise assume
 702                         * a native linux image.
 703                         */
 704                        if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
 705                            strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
 706                                ibcs2_interpreter = 1;
 707
 708                        /*
 709                         * The early SET_PERSONALITY here is so that the lookup
 710                         * for the interpreter happens in the namespace of the 
 711                         * to-be-execed image.  SET_PERSONALITY can select an
 712                         * alternate root.
 713                         *
 714                         * However, SET_PERSONALITY is NOT allowed to switch
 715                         * this task into the new images's memory mapping
 716                         * policy - that is, TASK_SIZE must still evaluate to
 717                         * that which is appropriate to the execing application.
 718                         * This is because exit_mmap() needs to have TASK_SIZE
 719                         * evaluate to the size of the old image.
 720                         *
 721                         * So if (say) a 64-bit application is execing a 32-bit
 722                         * application it is the architecture's responsibility
 723                         * to defer changing the value of TASK_SIZE until the
 724                         * switch really is going to happen - do this in
 725                         * flush_thread().      - akpm
 726                         */
 727                        SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 728
 729                        interpreter = open_exec(elf_interpreter);
 730                        retval = PTR_ERR(interpreter);
 731                        if (IS_ERR(interpreter))
 732                                goto out_free_interp;
 733                        retval = kernel_read(interpreter, 0, bprm->buf,
 734                                             BINPRM_BUF_SIZE);
 735                        if (retval != BINPRM_BUF_SIZE) {
 736                                if (retval >= 0)
 737                                        retval = -EIO;
 738                                goto out_free_dentry;
 739                        }
 740
 741                        /* Get the exec headers */
 742                        loc->interp_ex = *((struct exec *)bprm->buf);
 743                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 744                        break;
 745                }
 746                elf_ppnt++;
 747        }
 748
 749        elf_ppnt = elf_phdata;
 750        executable_stack = EXSTACK_DEFAULT;
 751
 752        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 753                if (elf_ppnt->p_type == PT_GNU_STACK) {
 754                        if (elf_ppnt->p_flags & PF_X)
 755                                executable_stack = EXSTACK_ENABLE_X;
 756                        else
 757                                executable_stack = EXSTACK_DISABLE_X;
 758                        break;
 759                }
 760        have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
 761
 762        if (current->personality == PER_LINUX && (exec_shield & 2)) {
 763                executable_stack = EXSTACK_DISABLE_X;
 764                current->flags |= PF_RANDOMIZE;
 765        }
 766
 767        /* Some simple consistency checks for the interpreter */
 768        if (elf_interpreter) {
 769                interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
 770
 771                /* Now figure out which format our binary is */
 772                if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
 773                    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
 774                    (N_MAGIC(loc->interp_ex) != QMAGIC))
 775                        interpreter_type = INTERPRETER_ELF;
 776
 777                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 778                        interpreter_type &= ~INTERPRETER_ELF;
 779
 780                retval = -ELIBBAD;
 781                if (!interpreter_type)
 782                        goto out_free_dentry;
 783
 784                /* Make sure only one type was selected */
 785                if ((interpreter_type & INTERPRETER_ELF) &&
 786                     interpreter_type != INTERPRETER_ELF) {
 787                        // FIXME - ratelimit this before re-enabling
 788                        // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
 789                        interpreter_type = INTERPRETER_ELF;
 790                }
 791                /* Verify the interpreter has a valid arch */
 792                if ((interpreter_type == INTERPRETER_ELF) &&
 793                    !elf_check_arch(&loc->interp_elf_ex))
 794                        goto out_free_dentry;
 795        } else {
 796                /* Executables without an interpreter also need a personality  */
 797                SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 798        }
 799
 800        /* OK, we are done with that, now set up the arg stuff,
 801           and then start this sucker up */
 802        if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
 803                char *passed_p = passed_fileno;
 804                sprintf(passed_fileno, "%d", elf_exec_fileno);
 805
 806                if (elf_interpreter) {
 807                        retval = copy_strings_kernel(1, &passed_p, bprm);
 808                        if (retval)
 809                                goto out_free_dentry; 
 810                        bprm->argc++;
 811                }
 812        }
 813
 814        /* Flush all traces of the currently running executable */
 815        retval = flush_old_exec(bprm);
 816        if (retval)
 817                goto out_free_dentry;
 818
 819#ifdef __i386__
 820        /*
 821         * Turn off the CS limit completely if exec-shield disabled or
 822         * NX active:
 823         */
 824        if (!exec_shield || executable_stack != EXSTACK_DISABLE_X || nx_enabled)
 825                arch_add_exec_range(current->mm, -1);
 826#endif
 827
 828        /* Discard our unneeded old files struct */
 829        if (files) {
 830                put_files_struct(files);
 831                files = NULL;
 832        }
 833
 834        /* OK, This is the point of no return */
 835        current->mm->start_data = 0;
 836        current->mm->end_data = 0;
 837        current->mm->end_code = 0;
 838        current->mm->mmap = NULL;
 839        current->flags &= ~PF_FORKNOEXEC;
 840        current->mm->def_flags = def_flags;
 841
 842        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 843           may depend on the personality.  */
 844        SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 845        if (!(exec_shield & 2) &&
 846                        elf_read_implies_exec(loc->elf_ex, executable_stack))
 847                current->personality |= READ_IMPLIES_EXEC;
 848
 849        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 850                current->flags |= PF_RANDOMIZE;
 851        arch_pick_mmap_layout(current->mm);
 852
 853        /* Do this so that we can load the interpreter, if need be.  We will
 854           change some of these later */
 855        current->mm->free_area_cache = current->mm->mmap_base;
 856        current->mm->cached_hole_size = 0;
 857        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 858                                 executable_stack);
 859        if (retval < 0) {
 860                send_sig(SIGKILL, current, 0);
 861                goto out_free_dentry;
 862        }
 863        
 864        current->mm->start_stack = bprm->p;
 865
 866        /* Now we do a little grungy work by mmaping the ELF image into
 867           the correct location in memory.
 868         */
 869        for(i = 0, elf_ppnt = elf_phdata;
 870            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 871                int elf_prot = 0, elf_flags;
 872                unsigned long k, vaddr;
 873
 874                if (elf_ppnt->p_type != PT_LOAD)
 875                        continue;
 876
 877                if (unlikely (elf_brk > elf_bss)) {
 878                        unsigned long nbyte;
 879                    
 880                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 881                           before this one. Map anonymous pages, if needed,
 882                           and clear the area.  */
 883                        retval = set_brk (elf_bss + load_bias,
 884                                          elf_brk + load_bias);
 885                        if (retval) {
 886                                send_sig(SIGKILL, current, 0);
 887                                goto out_free_dentry;
 888                        }
 889                        nbyte = ELF_PAGEOFFSET(elf_bss);
 890                        if (nbyte) {
 891                                nbyte = ELF_MIN_ALIGN - nbyte;
 892                                if (nbyte > elf_brk - elf_bss)
 893                                        nbyte = elf_brk - elf_bss;
 894                                if (clear_user((void __user *)elf_bss +
 895                                                        load_bias, nbyte)) {
 896                                        /*
 897                                         * This bss-zeroing can fail if the ELF
 898                                         * file specifies odd protections. So
 899                                         * we don't check the return value
 900                                         */
 901                                }
 902                        }
 903                }
 904
 905                if (elf_ppnt->p_flags & PF_R)
 906                        elf_prot |= PROT_READ;
 907                if (elf_ppnt->p_flags & PF_W)
 908                        elf_prot |= PROT_WRITE;
 909                if (elf_ppnt->p_flags & PF_X)
 910                        elf_prot |= PROT_EXEC;
 911
 912                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 913
 914                vaddr = elf_ppnt->p_vaddr;
 915                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set)
 916                        elf_flags |= MAP_FIXED;
 917                else if (loc->elf_ex.e_type == ET_DYN)
 918#ifdef __i386__
 919                        load_bias = 0;
 920#else
 921                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 922#endif
 923
 924                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 925                                elf_prot, elf_flags, 0);
 926                if (BAD_ADDR(error)) {
 927                        send_sig(SIGKILL, current, 0);
 928                        goto out_free_dentry;
 929                }
 930
 931                if (!load_addr_set) {
 932                        load_addr_set = 1;
 933                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 934                        if (loc->elf_ex.e_type == ET_DYN) {
 935                                load_bias += error -
 936                                             ELF_PAGESTART(load_bias + vaddr);
 937                                load_addr += load_bias;
 938                                reloc_func_desc = load_bias;
 939                        }
 940                }
 941                k = elf_ppnt->p_vaddr;
 942                if (k < start_code)
 943                        start_code = k;
 944                if (start_data < k)
 945                        start_data = k;
 946
 947                /*
 948                 * Check to see if the section's size will overflow the
 949                 * allowed task size. Note that p_filesz must always be
 950                 * <= p_memsz so it is only necessary to check p_memsz.
 951                 */
 952                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 953                    elf_ppnt->p_memsz > TASK_SIZE ||
 954                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 955                        /* set_brk can never work. Avoid overflows. */
 956                        send_sig(SIGKILL, current, 0);
 957                        goto out_free_dentry;
 958                }
 959
 960                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 961
 962                if (k > elf_bss)
 963                        elf_bss = k;
 964                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 965                        end_code = k;
 966                if (end_data < k)
 967                        end_data = k;
 968                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 969                if (k > elf_brk)
 970                        elf_brk = k;
 971        }
 972
 973        loc->elf_ex.e_entry += load_bias;
 974        elf_bss += load_bias;
 975        elf_brk += load_bias;
 976        start_code += load_bias;
 977        end_code += load_bias;
 978        start_data += load_bias;
 979        end_data += load_bias;
 980
 981        /* Calling set_brk effectively mmaps the pages that we need
 982         * for the bss and break sections.  We must do this before
 983         * mapping in the interpreter, to make sure it doesn't wind
 984         * up getting placed where the bss needs to go.
 985         */
 986        retval = set_brk(elf_bss, elf_brk);
 987        if (retval) {
 988                send_sig(SIGKILL, current, 0);
 989                goto out_free_dentry;
 990        }
 991        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 992                send_sig(SIGSEGV, current, 0);
 993                retval = -EFAULT; /* Nobody gets to see this, but.. */
 994                goto out_free_dentry;
 995        }
 996
 997        if (elf_interpreter) {
 998                if (interpreter_type == INTERPRETER_AOUT)
 999                        elf_entry = load_aout_interp(&loc->interp_ex,
1000                                                     interpreter);
1001                else {
1002                        elf_entry = load_elf_interp(&loc->interp_elf_ex,
1003                                                    interpreter,
1004                                                    &interp_map_addr,
1005                                                    load_bias);
1006                        if (!BAD_ADDR(elf_entry)) {
1007                                /* load_elf_interp() returns relocation adjustment */
1008                                interp_load_addr = elf_entry;
1009                                elf_entry += loc->interp_elf_ex.e_entry;
1010                        }
1011                }
1012                if (BAD_ADDR(elf_entry)) {
1013                        force_sig(SIGSEGV, current);
1014                        retval = IS_ERR((void *)elf_entry) ?
1015                                        (int)elf_entry : -EINVAL;
1016                        goto out_free_dentry;
1017                }
1018                reloc_func_desc = interp_load_addr;
1019
1020                allow_write_access(interpreter);
1021                fput(interpreter);
1022                kfree(elf_interpreter);
1023        } else {
1024                elf_entry = loc->elf_ex.e_entry;
1025                if (BAD_ADDR(elf_entry)) {
1026                        force_sig(SIGSEGV, current);
1027                        retval = -EINVAL;
1028                        goto out_free_dentry;
1029                }
1030        }
1031
1032        if (interpreter_type != INTERPRETER_AOUT)
1033                sys_close(elf_exec_fileno);
1034
1035        set_binfmt(&elf_format);
1036
1037#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1038        retval = arch_setup_additional_pages(bprm, executable_stack,
1039                        start_code, interp_map_addr);
1040        if (retval < 0) {
1041                send_sig(SIGKILL, current, 0);
1042                goto out_free_fh;
1043        }
1044#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1045
1046        kfree(elf_phdata);
1047
1048        compute_creds(bprm);
1049        current->flags &= ~PF_FORKNOEXEC;
1050        create_elf_tables(bprm, &loc->elf_ex,
1051                          (interpreter_type == INTERPRETER_AOUT),
1052                          load_addr, interp_load_addr);
1053        /* N.B. passed_fileno might not be initialized? */
1054        if (interpreter_type == INTERPRETER_AOUT)
1055                current->mm->arg_start += strlen(passed_fileno) + 1;
1056        current->mm->end_code = end_code;
1057        current->mm->start_code = start_code;
1058        current->mm->start_data = start_data;
1059        current->mm->end_data = end_data;
1060        current->mm->start_stack = bprm->p;
1061
1062#ifdef __HAVE_ARCH_RANDOMIZE_BRK
1063        if (current->flags & PF_RANDOMIZE)
1064                randomize_brk(elf_brk);
1065#endif
1066        if (current->personality & MMAP_PAGE_ZERO) {
1067                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1068                   and some applications "depend" upon this behavior.
1069                   Since we do not have the power to recompile these, we
1070                   emulate the SVr4 behavior. Sigh. */
1071                down_write(&current->mm->mmap_sem);
1072                error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1073                                MAP_FIXED | MAP_PRIVATE, 0);
1074                up_write(&current->mm->mmap_sem);
1075        }
1076
1077#ifdef ELF_PLAT_INIT
1078        /*
1079         * The ABI may specify that certain registers be set up in special
1080         * ways (on i386 %edx is the address of a DT_FINI function, for
1081         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1082         * that the e_entry field is the address of the function descriptor
1083         * for the startup routine, rather than the address of the startup
1084         * routine itself.  This macro performs whatever initialization to
1085         * the regs structure is required as well as any relocations to the
1086         * function descriptor entries when executing dynamically links apps.
1087         */
1088        ELF_PLAT_INIT(regs, reloc_func_desc);
1089#endif
1090
1091        start_thread(regs, elf_entry, bprm->p);
1092        retval = 0;
1093out:
1094        kfree(loc);
1095out_ret:
1096        return retval;
1097
1098        /* error cleanup */
1099out_free_dentry:
1100        allow_write_access(interpreter);
1101        if (interpreter)
1102                fput(interpreter);
1103out_free_interp:
1104        kfree(elf_interpreter);
1105out_free_file:
1106        sys_close(elf_exec_fileno);
1107out_free_fh:
1108        if (files) {
1109                put_files_struct(current->files);
1110                current->files = files;
1111        }
1112out_free_ph:
1113        kfree(elf_phdata);
1114        goto out;
1115}
1116
1117/* This is really simpleminded and specialized - we are loading an
1118   a.out library that is given an ELF header. */
1119static int load_elf_library(struct file *file)
1120{
1121        struct elf_phdr *elf_phdata;
1122        struct elf_phdr *eppnt;
1123        unsigned long elf_bss, bss, len;
1124        int retval, error, i, j;
1125        struct elfhdr elf_ex;
1126
1127        error = -ENOEXEC;
1128        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1129        if (retval != sizeof(elf_ex))
1130                goto out;
1131
1132        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1133                goto out;
1134
1135        /* First of all, some simple consistency checks */
1136        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1137            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1138                goto out;
1139
1140        /* Now read in all of the header information */
1141
1142        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1143        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1144
1145        error = -ENOMEM;
1146        elf_phdata = kmalloc(j, GFP_KERNEL);
1147        if (!elf_phdata)
1148                goto out;
1149
1150        eppnt = elf_phdata;
1151        error = -ENOEXEC;
1152        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1153        if (retval != j)
1154                goto out_free_ph;
1155
1156        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1157                if ((eppnt + i)->p_type == PT_LOAD)
1158                        j++;
1159        if (j != 1)
1160                goto out_free_ph;
1161
1162        while (eppnt->p_type != PT_LOAD)
1163                eppnt++;
1164
1165        /* Now use mmap to map the library into memory. */
1166        down_write(&current->mm->mmap_sem);
1167        error = do_mmap(file,
1168                        ELF_PAGESTART(eppnt->p_vaddr),
1169                        (eppnt->p_filesz +
1170                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1171                        PROT_READ | PROT_WRITE | PROT_EXEC,
1172                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1173                        (eppnt->p_offset -
1174                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1175        up_write(&current->mm->mmap_sem);
1176        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1177                goto out_free_ph;
1178
1179        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1180        if (padzero(elf_bss)) {
1181                error = -EFAULT;
1182                goto out_free_ph;
1183        }
1184
1185        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1186                            ELF_MIN_ALIGN - 1);
1187        bss = eppnt->p_memsz + eppnt->p_vaddr;
1188        if (bss > len) {
1189                down_write(&current->mm->mmap_sem);
1190                do_brk(len, bss - len);
1191                up_write(&current->mm->mmap_sem);
1192        }
1193        error = 0;
1194
1195out_free_ph:
1196        kfree(elf_phdata);
1197out:
1198        return error;
1199}
1200
1201/*
1202 * Note that some platforms still use traditional core dumps and not
1203 * the ELF core dump.  Each platform can select it as appropriate.
1204 */
1205#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1206
1207/*
1208 * ELF core dumper
1209 *
1210 * Modelled on fs/exec.c:aout_core_dump()
1211 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1212 */
1213/*
1214 * These are the only things you should do on a core-file: use only these
1215 * functions to write out all the necessary info.
1216 */
1217static int dump_write(struct file *file, const void *addr, int nr)
1218{
1219        return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1220}
1221
1222static int dump_seek(struct file *file, loff_t off)
1223{
1224        if (file->f_op->llseek) {
1225                if (file->f_op->llseek(file, off, 0) != off)
1226                        return 0;
1227        } else
1228                file->f_pos = off;
1229        return 1;
1230}
1231
1232/*
1233 * Decide whether a segment is worth dumping; default is yes to be
1234 * sure (missing info is worse than too much; etc).
1235 * Personally I'd include everything, and use the coredump limit...
1236 *
1237 * I think we should skip something. But I am not sure how. H.J.
1238 */
1239static int maydump(struct vm_area_struct *vma)
1240{
1241        /* The vma can be set up to tell us the answer directly.  */
1242        if (vma->vm_flags & VM_ALWAYSDUMP)
1243                return 1;
1244
1245        /* Do not dump I/O mapped devices or special mappings */
1246        if (vma->vm_flags & (VM_IO | VM_RESERVED))
1247                return 0;
1248
1249        /* Dump shared memory only if mapped from an anonymous file. */
1250        if (vma->vm_flags & VM_SHARED)
1251                return vma->vm_file->f_dentry->d_inode->i_nlink == 0;
1252
1253        /* If it hasn't been written to, don't write it out */
1254        if (!vma->anon_vma)
1255                return 0;
1256
1257        return 1;
1258}
1259
1260/* An ELF note in memory */
1261struct memelfnote
1262{
1263        const char *name;
1264        int type;
1265        unsigned int datasz;
1266        void *data;
1267};
1268
1269static int notesize(struct memelfnote *en)
1270{
1271        int sz;
1272
1273        sz = sizeof(struct elf_note);
1274        sz += roundup(strlen(en->name) + 1, 4);
1275        sz += roundup(en->datasz, 4);
1276
1277        return sz;
1278}
1279
1280#define DUMP_WRITE(addr, nr)    \
1281        do { if (!dump_write(file, (addr), (nr))) return 0; } while(0)
1282#define DUMP_SEEK(off)  \
1283        do { if (!dump_seek(file, (off))) return 0; } while(0)
1284
1285static int writenote(struct memelfnote *men, struct file *file)
1286{
1287        struct elf_note en;
1288
1289        en.n_namesz = strlen(men->name) + 1;
1290        en.n_descsz = men->datasz;
1291        en.n_type = men->type;
1292
1293        DUMP_WRITE(&en, sizeof(en));
1294        DUMP_WRITE(men->name, en.n_namesz);
1295        /* XXX - cast from long long to long to avoid need for libgcc.a */
1296        DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));      /* XXX */
1297        DUMP_WRITE(men->data, men->datasz);
1298        DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));      /* XXX */
1299
1300        return 1;
1301}
1302#undef DUMP_WRITE
1303#undef DUMP_SEEK
1304
1305#define DUMP_WRITE(addr, nr)    \
1306        if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1307                goto end_coredump;
1308#define DUMP_SEEK(off)  \
1309        if (!dump_seek(file, (off))) \
1310                goto end_coredump;
1311
1312static void fill_elf_header(struct elfhdr *elf, int segs)
1313{
1314        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1315        elf->e_ident[EI_CLASS] = ELF_CLASS;
1316        elf->e_ident[EI_DATA] = ELF_DATA;
1317        elf->e_ident[EI_VERSION] = EV_CURRENT;
1318        elf->e_ident[EI_OSABI] = ELF_OSABI;
1319        memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1320
1321        elf->e_type = ET_CORE;
1322        elf->e_machine = ELF_ARCH;
1323        elf->e_version = EV_CURRENT;
1324        elf->e_entry = 0;
1325        elf->e_phoff = sizeof(struct elfhdr);
1326        elf->e_shoff = 0;
1327        elf->e_flags = ELF_CORE_EFLAGS;
1328        elf->e_ehsize = sizeof(struct elfhdr);
1329        elf->e_phentsize = sizeof(struct elf_phdr);
1330        elf->e_phnum = segs;
1331        elf->e_shentsize = 0;
1332        elf->e_shnum = 0;
1333        elf->e_shstrndx = 0;
1334        return;
1335}
1336
1337static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
1338{
1339        phdr->p_type = PT_NOTE;
1340        phdr->p_offset = offset;
1341        phdr->p_vaddr = 0;
1342        phdr->p_paddr = 0;
1343        phdr->p_filesz = sz;
1344        phdr->p_memsz = 0;
1345        phdr->p_flags = 0;
1346        phdr->p_align = 0;
1347        return;
1348}
1349
1350static void fill_note(struct memelfnote *note, const char *name, int type, 
1351                unsigned int sz, void *data)
1352{
1353        note->name = name;
1354        note->type = type;
1355        note->datasz = sz;
1356        note->data = data;
1357        return;
1358}
1359
1360/*
1361 * fill up all the fields in prstatus from the given task struct, except
1362 * registers which need to be filled up separately.
1363 */
1364static void fill_prstatus(struct elf_prstatus *prstatus,
1365                struct task_struct *p, long signr)
1366{
1367        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1368        prstatus->pr_sigpend = p->pending.signal.sig[0];
1369        prstatus->pr_sighold = p->blocked.sig[0];
1370        prstatus->pr_pid = p->pid;
1371        prstatus->pr_ppid = p->parent->pid;
1372        prstatus->pr_pgrp = process_group(p);
1373        prstatus->pr_sid = p->signal->session;
1374        if (thread_group_leader(p)) {
1375                /*
1376                 * This is the record for the group leader.  Add in the
1377                 * cumulative times of previous dead threads.  This total
1378                 * won't include the time of each live thread whose state
1379                 * is included in the core dump.  The final total reported
1380                 * to our parent process when it calls wait4 will include
1381                 * those sums as well as the little bit more time it takes
1382                 * this and each other thread to finish dying after the
1383                 * core dump synchronization phase.
1384                 */
1385                cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1386                                   &prstatus->pr_utime);
1387                cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1388                                   &prstatus->pr_stime);
1389        } else {
1390                cputime_to_timeval(p->utime, &prstatus->pr_utime);
1391                cputime_to_timeval(p->stime, &prstatus->pr_stime);
1392        }
1393        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1394        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1395}
1396
1397static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1398                       struct mm_struct *mm)
1399{
1400        unsigned int i, len;
1401        
1402        /* first copy the parameters from user space */
1403        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1404
1405        len = mm->arg_end - mm->arg_start;
1406        if (len >= ELF_PRARGSZ)
1407                len = ELF_PRARGSZ-1;
1408        if (copy_from_user(&psinfo->pr_psargs,
1409                           (const char __user *)mm->arg_start, len))
1410                return -EFAULT;
1411        for(i = 0; i < len; i++)
1412                if (psinfo->pr_psargs[i] == 0)
1413                        psinfo->pr_psargs[i] = ' ';
1414        psinfo->pr_psargs[len] = 0;
1415
1416        psinfo->pr_pid = p->pid;
1417        psinfo->pr_ppid = p->parent->pid;
1418        psinfo->pr_pgrp = process_group(p);
1419        psinfo->pr_sid = p->signal->session;
1420
1421        i = p->state ? ffz(~p->state) + 1 : 0;
1422        psinfo->pr_state = i;
1423        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1424        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1425        psinfo->pr_nice = task_nice(p);
1426        psinfo->pr_flag = p->flags;
1427        SET_UID(psinfo->pr_uid, p->uid);
1428        SET_GID(psinfo->pr_gid, p->gid);
1429        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1430        
1431        return 0;
1432}
1433
1434/* Here is the structure in which status of each thread is captured. */
1435struct elf_thread_status
1436{
1437        struct list_head list;
1438        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1439        elf_fpregset_t fpu;             /* NT_PRFPREG */
1440        struct task_struct *thread;
1441#ifdef ELF_CORE_COPY_XFPREGS
1442        elf_fpxregset_t xfpu;           /* NT_PRXFPREG */
1443#endif
1444        struct memelfnote notes[3];
1445        int num_notes;
1446};
1447
1448/*
1449 * In order to add the specific thread information for the elf file format,
1450 * we need to keep a linked list of every threads pr_status and then create
1451 * a single section for them in the final core file.
1452 */
1453static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1454{
1455        int sz = 0;
1456        struct task_struct *p = t->thread;
1457        t->num_notes = 0;
1458
1459        fill_prstatus(&t->prstatus, p, signr);
1460        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1461        
1462        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1463                  &(t->prstatus));
1464        t->num_notes++;
1465        sz += notesize(&t->notes[0]);
1466
1467        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1468                                                                &t->fpu))) {
1469                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1470                          &(t->fpu));
1471                t->num_notes++;
1472                sz += notesize(&t->notes[1]);
1473        }
1474
1475#ifdef ELF_CORE_COPY_XFPREGS
1476        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1477                fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1478                          &t->xfpu);
1479                t->num_notes++;
1480                sz += notesize(&t->notes[2]);
1481        }
1482#endif  
1483        return sz;
1484}
1485
1486/*
1487 * Actual dumper
1488 *
1489 * This is a two-pass process; first we find the offsets of the bits,
1490 * and then they are actually written out.  If we run out of core limit
1491 * we just truncate.
1492 */
1493static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1494{
1495#define NUM_NOTES       6
1496        int has_dumped = 0;
1497        mm_segment_t fs;
1498        int segs;
1499        size_t size = 0;
1500        int i;
1501        struct vm_area_struct *vma;
1502        struct elfhdr *elf = NULL;
1503        off_t offset = 0, dataoff;
1504        unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1505        int numnote;
1506        struct memelfnote *notes = NULL;
1507        struct elf_prstatus *prstatus = NULL;   /* NT_PRSTATUS */
1508        struct elf_prpsinfo *psinfo = NULL;     /* NT_PRPSINFO */
1509        struct task_struct *g, *p;
1510        LIST_HEAD(thread_list);
1511        struct list_head *t;
1512        elf_fpregset_t *fpu = NULL;
1513#ifdef ELF_CORE_COPY_XFPREGS
1514        elf_fpxregset_t *xfpu = NULL;
1515#endif
1516        int thread_status_size = 0;
1517        elf_addr_t *auxv;
1518
1519        /*
1520         * We no longer stop all VM operations.
1521         * 
1522         * This is because those proceses that could possibly change map_count
1523         * or the mmap / vma pages are now blocked in do_exit on current
1524         * finishing this core dump.
1525         *
1526         * Only ptrace can touch these memory addresses, but it doesn't change
1527         * the map_count or the pages allocated. So no possibility of crashing
1528         * exists while dumping the mm->vm_next areas to the core file.
1529         */
1530  
1531        /* alloc memory for large data structures: too large to be on stack */
1532        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1533        if (!elf)
1534                goto cleanup;
1535        prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1536        if (!prstatus)
1537                goto cleanup;
1538        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1539        if (!psinfo)
1540                goto cleanup;
1541        notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1542        if (!notes)
1543                goto cleanup;
1544        fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1545        if (!fpu)
1546                goto cleanup;
1547#ifdef ELF_CORE_COPY_XFPREGS
1548        xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1549        if (!xfpu)
1550                goto cleanup;
1551#endif
1552
1553        if (signr) {
1554                struct elf_thread_status *tmp;
1555                read_lock(&tasklist_lock);
1556                do_each_thread(g,p)
1557                        if (current->mm == p->mm && current != p) {
1558                                tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1559                                if (!tmp) {
1560                                        read_unlock(&tasklist_lock);
1561                                        goto cleanup;
1562                                }
1563                                INIT_LIST_HEAD(&tmp->list);
1564                                tmp->thread = p;
1565                                list_add(&tmp->list, &thread_list);
1566                        }
1567                while_each_thread(g,p);
1568                read_unlock(&tasklist_lock);
1569                list_for_each(t, &thread_list) {
1570                        struct elf_thread_status *tmp;
1571                        int sz;
1572
1573                        tmp = list_entry(t, struct elf_thread_status, list);
1574                        sz = elf_dump_thread_status(signr, tmp);
1575                        thread_status_size += sz;
1576                }
1577        }
1578        /* now collect the dump for the current */
1579        memset(prstatus, 0, sizeof(*prstatus));
1580        fill_prstatus(prstatus, current, signr);
1581        elf_core_copy_regs(&prstatus->pr_reg, regs);
1582        
1583        segs = current->mm->map_count;
1584#ifdef ELF_CORE_EXTRA_PHDRS
1585        segs += ELF_CORE_EXTRA_PHDRS;
1586#endif
1587
1588        /* Set up header */
1589        fill_elf_header(elf, segs + 1); /* including notes section */
1590
1591        has_dumped = 1;
1592        current->flags |= PF_DUMPCORE;
1593
1594        /*
1595         * Set up the notes in similar form to SVR4 core dumps made
1596         * with info from their /proc.
1597         */
1598
1599        fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1600        fill_psinfo(psinfo, current->group_leader, current->mm);
1601        fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1602        
1603        numnote = 2;
1604
1605        auxv = (elf_addr_t *)current->mm->saved_auxv;
1606
1607        i = 0;
1608        do
1609                i += 2;
1610        while (auxv[i - 2] != AT_NULL);
1611        fill_note(&notes[numnote++], "CORE", NT_AUXV,
1612                  i * sizeof(elf_addr_t), auxv);
1613
1614        /* Try to dump the FPU. */
1615        if ((prstatus->pr_fpvalid =
1616             elf_core_copy_task_fpregs(current, regs, fpu)))
1617                fill_note(notes + numnote++,
1618                          "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1619#ifdef ELF_CORE_COPY_XFPREGS
1620        if (elf_core_copy_task_xfpregs(current, xfpu))
1621                fill_note(notes + numnote++,
1622                          "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1623#endif  
1624  
1625        fs = get_fs();
1626        set_fs(KERNEL_DS);
1627
1628        DUMP_WRITE(elf, sizeof(*elf));
1629        offset += sizeof(*elf);                         /* Elf header */
1630        offset += (segs+1) * sizeof(struct elf_phdr);   /* Program headers */
1631
1632        /* Write notes phdr entry */
1633        {
1634                struct elf_phdr phdr;
1635                int sz = 0;
1636
1637                for (i = 0; i < numnote; i++)
1638                        sz += notesize(notes + i);
1639                
1640                sz += thread_status_size;
1641
1642                fill_elf_note_phdr(&phdr, sz, offset);
1643                offset += sz;
1644                DUMP_WRITE(&phdr, sizeof(phdr));
1645        }
1646
1647        /* Page-align dumped data */
1648        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1649
1650        /* Write program headers for segments dump */
1651        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1652                struct elf_phdr phdr;
1653                size_t sz;
1654
1655                sz = vma->vm_end - vma->vm_start;
1656
1657                phdr.p_type = PT_LOAD;
1658                phdr.p_offset = offset;
1659                phdr.p_vaddr = vma->vm_start;
1660                phdr.p_paddr = 0;
1661                phdr.p_filesz = maydump(vma) ? sz : 0;
1662                phdr.p_memsz = sz;
1663                offset += phdr.p_filesz;
1664                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1665                if (vma->vm_flags & VM_WRITE)
1666                        phdr.p_flags |= PF_W;
1667                if (vma->vm_flags & VM_EXEC)
1668                        phdr.p_flags |= PF_X;
1669                phdr.p_align = ELF_EXEC_PAGESIZE;
1670
1671                DUMP_WRITE(&phdr, sizeof(phdr));
1672        }
1673
1674#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1675        ELF_CORE_WRITE_EXTRA_PHDRS;
1676#endif
1677
1678        /* write out the notes section */
1679        for (i = 0; i < numnote; i++)
1680                if (!writenote(notes + i, file))
1681                        goto end_coredump;
1682
1683        /* write out the thread status notes section */
1684        list_for_each(t, &thread_list) {
1685                struct elf_thread_status *tmp =
1686                                list_entry(t, struct elf_thread_status, list);
1687
1688                for (i = 0; i < tmp->num_notes; i++)
1689                        if (!writenote(&tmp->notes[i], file))
1690                                goto end_coredump;
1691        }
1692 
1693        DUMP_SEEK(dataoff);
1694
1695        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1696                unsigned long addr;
1697
1698                if (!maydump(vma))
1699                        continue;
1700
1701                for (addr = vma->vm_start;
1702                     addr < vma->vm_end;
1703                     addr += PAGE_SIZE) {
1704                        struct page *page;
1705                        struct vm_area_struct *vma;
1706
1707                        if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1708                                                &page, &vma) <= 0) {
1709                                DUMP_SEEK(file->f_pos + PAGE_SIZE);
1710                        } else {
1711                                if (page == ZERO_PAGE(addr)) {
1712                                        DUMP_SEEK(file->f_pos + PAGE_SIZE);
1713                                } else {
1714                                        void *kaddr;
1715                                        flush_cache_page(vma, addr,
1716                                                         page_to_pfn(page));
1717                                        kaddr = kmap(page);
1718                                        if ((size += PAGE_SIZE) > limit ||
1719                                            !dump_write(file, kaddr,
1720                                            PAGE_SIZE)) {
1721                                                kunmap(page);
1722                                                page_cache_release(page);
1723                                                goto end_coredump;
1724                                        }
1725                                        kunmap(page);
1726                                }
1727                                page_cache_release(page);
1728                        }
1729                }
1730        }
1731
1732#ifdef ELF_CORE_WRITE_EXTRA_DATA
1733        ELF_CORE_WRITE_EXTRA_DATA;
1734#endif
1735
1736        if ((off_t)file->f_pos != offset) {
1737                /* Sanity check */
1738                printk(KERN_WARNING
1739                       "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
1740                       (off_t)file->f_pos, offset);
1741        }
1742
1743end_coredump:
1744        set_fs(fs);
1745
1746cleanup:
1747        while (!list_empty(&thread_list)) {
1748                struct list_head *tmp = thread_list.next;
1749                list_del(tmp);
1750                kfree(list_entry(tmp, struct elf_thread_status, list));
1751        }
1752
1753        kfree(elf);
1754        kfree(prstatus);
1755        kfree(psinfo);
1756        kfree(notes);
1757        kfree(fpu);
1758#ifdef ELF_CORE_COPY_XFPREGS
1759        kfree(xfpu);
1760#endif
1761        return has_dumped;
1762#undef NUM_NOTES
1763}
1764
1765#endif          /* USE_ELF_CORE_DUMP */
1766
1767static int __init init_elf_binfmt(void)
1768{
1769        return register_binfmt(&elf_format);
1770}
1771
1772static void __exit exit_elf_binfmt(void)
1773{
1774        /* Remove the COFF and ELF loaders. */
1775        unregister_binfmt(&elf_format);
1776}
1777
1778core_initcall(init_elf_binfmt);
1779module_exit(exit_elf_binfmt);
1780MODULE_LICENSE("GPL");
1781