RHEL4/kernel/sys.c
<<
>>
Prefs
   1/*
   2 *  linux/kernel/sys.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/config.h>
   8#include <linux/compat.h>
   9#include <linux/module.h>
  10#include <linux/mm.h>
  11#include <linux/utsname.h>
  12#include <linux/mman.h>
  13#include <linux/smp_lock.h>
  14#include <linux/notifier.h>
  15#include <linux/reboot.h>
  16#include <linux/prctl.h>
  17#include <linux/init.h>
  18#include <linux/highuid.h>
  19#include <linux/fs.h>
  20#include <linux/workqueue.h>
  21#include <linux/device.h>
  22#include <linux/key.h>
  23#include <linux/times.h>
  24#include <linux/security.h>
  25#include <linux/dcookies.h>
  26#include <linux/suspend.h>
  27#include <linux/tty.h>
  28#include <linux/task_io_accounting_ops.h>
  29
  30#include <asm/uaccess.h>
  31#include <asm/io.h>
  32#include <asm/unistd.h>
  33#include <linux/getcpu.h>
  34
  35#ifndef SET_UNALIGN_CTL
  36# define SET_UNALIGN_CTL(a,b)   (-EINVAL)
  37#endif
  38#ifndef GET_UNALIGN_CTL
  39# define GET_UNALIGN_CTL(a,b)   (-EINVAL)
  40#endif
  41#ifndef SET_FPEMU_CTL
  42# define SET_FPEMU_CTL(a,b)     (-EINVAL)
  43#endif
  44#ifndef GET_FPEMU_CTL
  45# define GET_FPEMU_CTL(a,b)     (-EINVAL)
  46#endif
  47#ifndef SET_FPEXC_CTL
  48# define SET_FPEXC_CTL(a,b)     (-EINVAL)
  49#endif
  50#ifndef GET_FPEXC_CTL
  51# define GET_FPEXC_CTL(a,b)     (-EINVAL)
  52#endif
  53#ifndef GET_ENDIAN
  54# define GET_ENDIAN(a,b)        (-EINVAL)
  55#endif
  56#ifndef SET_ENDIAN
  57# define SET_ENDIAN(a,b)        (-EINVAL)
  58#endif
  59
  60/*
  61 * this is where the system-wide overflow UID and GID are defined, for
  62 * architectures that now have 32-bit UID/GID but didn't in the past
  63 */
  64
  65int overflowuid = DEFAULT_OVERFLOWUID;
  66int overflowgid = DEFAULT_OVERFLOWGID;
  67
  68#ifdef CONFIG_UID16
  69EXPORT_SYMBOL(overflowuid);
  70EXPORT_SYMBOL(overflowgid);
  71#endif
  72
  73/*
  74 * the same as above, but for filesystems which can only store a 16-bit
  75 * UID and GID. as such, this is needed on all architectures
  76 */
  77
  78int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
  79int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
  80
  81EXPORT_SYMBOL(fs_overflowuid);
  82EXPORT_SYMBOL(fs_overflowgid);
  83
  84/*
  85 * this indicates whether you can reboot with ctrl-alt-del: the default is yes
  86 */
  87
  88int C_A_D = 1;
  89int cad_pid = 1;
  90
  91/*
  92 *      Notifier list for kernel code which wants to be called
  93 *      at shutdown. This is used to stop any idling DMA operations
  94 *      and the like. 
  95 */
  96
  97static struct notifier_block *reboot_notifier_list;
  98rwlock_t notifier_lock = RW_LOCK_UNLOCKED;
  99
 100/**
 101 *      notifier_chain_register - Add notifier to a notifier chain
 102 *      @list: Pointer to root list pointer
 103 *      @n: New entry in notifier chain
 104 *
 105 *      Adds a notifier to a notifier chain.
 106 *
 107 *      Currently always returns zero.
 108 */
 109 
 110int notifier_chain_register(struct notifier_block **list, struct notifier_block *n)
 111{
 112        write_lock(&notifier_lock);
 113        while(*list)
 114        {
 115                if(n->priority > (*list)->priority)
 116                        break;
 117                list= &((*list)->next);
 118        }
 119        n->next = *list;
 120        *list=n;
 121        write_unlock(&notifier_lock);
 122        return 0;
 123}
 124
 125EXPORT_SYMBOL(notifier_chain_register);
 126
 127/**
 128 *      notifier_chain_unregister - Remove notifier from a notifier chain
 129 *      @nl: Pointer to root list pointer
 130 *      @n: New entry in notifier chain
 131 *
 132 *      Removes a notifier from a notifier chain.
 133 *
 134 *      Returns zero on success, or %-ENOENT on failure.
 135 */
 136 
 137int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n)
 138{
 139        write_lock(&notifier_lock);
 140        while((*nl)!=NULL)
 141        {
 142                if((*nl)==n)
 143                {
 144                        *nl=n->next;
 145                        write_unlock(&notifier_lock);
 146                        return 0;
 147                }
 148                nl=&((*nl)->next);
 149        }
 150        write_unlock(&notifier_lock);
 151        return -ENOENT;
 152}
 153
 154EXPORT_SYMBOL(notifier_chain_unregister);
 155
 156/**
 157 *      notifier_call_chain - Call functions in a notifier chain
 158 *      @n: Pointer to root pointer of notifier chain
 159 *      @val: Value passed unmodified to notifier function
 160 *      @v: Pointer passed unmodified to notifier function
 161 *
 162 *      Calls each function in a notifier chain in turn.
 163 *
 164 *      If the return value of the notifier can be and'd
 165 *      with %NOTIFY_STOP_MASK, then notifier_call_chain
 166 *      will return immediately, with the return value of
 167 *      the notifier function which halted execution.
 168 *      Otherwise, the return value is the return value
 169 *      of the last notifier function called.
 170 */
 171 
 172int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v)
 173{
 174        int ret=NOTIFY_DONE;
 175        struct notifier_block *nb = *n;
 176
 177        while(nb)
 178        {
 179                ret=nb->notifier_call(nb,val,v);
 180                if(ret&NOTIFY_STOP_MASK)
 181                {
 182                        return ret;
 183                }
 184                nb=nb->next;
 185        }
 186        return ret;
 187}
 188
 189EXPORT_SYMBOL(notifier_call_chain);
 190
 191/**
 192 *      register_reboot_notifier - Register function to be called at reboot time
 193 *      @nb: Info about notifier function to be called
 194 *
 195 *      Registers a function with the list of functions
 196 *      to be called at reboot time.
 197 *
 198 *      Currently always returns zero, as notifier_chain_register
 199 *      always returns zero.
 200 */
 201 
 202int register_reboot_notifier(struct notifier_block * nb)
 203{
 204        return notifier_chain_register(&reboot_notifier_list, nb);
 205}
 206
 207EXPORT_SYMBOL(register_reboot_notifier);
 208
 209/**
 210 *      unregister_reboot_notifier - Unregister previously registered reboot notifier
 211 *      @nb: Hook to be unregistered
 212 *
 213 *      Unregisters a previously registered reboot
 214 *      notifier function.
 215 *
 216 *      Returns zero on success, or %-ENOENT on failure.
 217 */
 218 
 219int unregister_reboot_notifier(struct notifier_block * nb)
 220{
 221        return notifier_chain_unregister(&reboot_notifier_list, nb);
 222}
 223
 224EXPORT_SYMBOL(unregister_reboot_notifier);
 225
 226asmlinkage long sys_ni_syscall(void)
 227{
 228        return -ENOSYS;
 229}
 230
 231cond_syscall(sys_nfsservctl)
 232cond_syscall(sys_quotactl)
 233cond_syscall(sys_acct)
 234cond_syscall(sys_lookup_dcookie)
 235cond_syscall(sys_swapon)
 236cond_syscall(sys_swapoff)
 237cond_syscall(sys_init_module)
 238cond_syscall(sys_delete_module)
 239cond_syscall(sys_socketpair)
 240cond_syscall(sys_bind)
 241cond_syscall(sys_listen)
 242cond_syscall(sys_accept)
 243cond_syscall(sys_connect)
 244cond_syscall(sys_getsockname)
 245cond_syscall(sys_getpeername)
 246cond_syscall(sys_sendto)
 247cond_syscall(sys_send)
 248cond_syscall(sys_recvfrom)
 249cond_syscall(sys_recv)
 250cond_syscall(sys_socket)
 251cond_syscall(sys_setsockopt)
 252cond_syscall(sys_getsockopt)
 253cond_syscall(sys_shutdown)
 254cond_syscall(sys_sendmsg)
 255cond_syscall(sys_recvmsg)
 256cond_syscall(sys_socketcall)
 257cond_syscall(sys_futex)
 258cond_syscall(compat_sys_futex)
 259cond_syscall(sys_epoll_create)
 260cond_syscall(sys_epoll_ctl)
 261cond_syscall(sys_epoll_wait)
 262cond_syscall(sys_semget)
 263cond_syscall(sys_semop)
 264cond_syscall(sys_semtimedop)
 265cond_syscall(sys_semctl)
 266cond_syscall(sys_msgget)
 267cond_syscall(sys_msgsnd)
 268cond_syscall(sys_msgrcv)
 269cond_syscall(sys_msgctl)
 270cond_syscall(sys_shmget)
 271cond_syscall(sys_shmdt)
 272cond_syscall(sys_shmctl)
 273cond_syscall(sys_mq_open)
 274cond_syscall(sys_mq_unlink)
 275cond_syscall(sys_mq_timedsend)
 276cond_syscall(sys_mq_timedreceive)
 277cond_syscall(sys_mq_notify)
 278cond_syscall(sys_mq_getsetattr)
 279cond_syscall(compat_sys_mq_open)
 280cond_syscall(compat_sys_mq_timedsend)
 281cond_syscall(compat_sys_mq_timedreceive)
 282cond_syscall(compat_sys_mq_notify)
 283cond_syscall(compat_sys_mq_getsetattr)
 284cond_syscall(sys_mbind)
 285cond_syscall(sys_get_mempolicy)
 286cond_syscall(sys_set_mempolicy)
 287cond_syscall(compat_mbind)
 288cond_syscall(compat_get_mempolicy)
 289cond_syscall(compat_set_mempolicy)
 290cond_syscall(sys_add_key);
 291cond_syscall(sys_request_key);
 292cond_syscall(sys_keyctl);
 293cond_syscall(compat_sys_keyctl);
 294
 295/* arch-specific weak syscall entries */
 296cond_syscall(sys_pciconfig_read)
 297cond_syscall(sys_pciconfig_write)
 298cond_syscall(sys_pciconfig_iobase)
 299
 300static int set_one_prio(struct task_struct *p, int niceval, int error)
 301{
 302        int no_nice;
 303
 304        if (p->uid != current->euid &&
 305                p->uid != current->uid && !capable(CAP_SYS_NICE)) {
 306                error = -EPERM;
 307                goto out;
 308        }
 309        if (niceval < task_nice(p) && !capable(CAP_SYS_NICE)) {
 310                error = -EACCES;
 311                goto out;
 312        }
 313        no_nice = security_task_setnice(p, niceval);
 314        if (no_nice) {
 315                error = no_nice;
 316                goto out;
 317        }
 318        if (error == -ESRCH)
 319                error = 0;
 320        set_user_nice(p, niceval);
 321out:
 322        return error;
 323}
 324
 325asmlinkage long sys_setpriority(int which, int who, int niceval)
 326{
 327        struct task_struct *g, *p;
 328        struct user_struct *user;
 329        int error = -EINVAL;
 330
 331        if (which > 2 || which < 0)
 332                goto out;
 333
 334        /* normalize: avoid signed division (rounding problems) */
 335        error = -ESRCH;
 336        if (niceval < -20)
 337                niceval = -20;
 338        if (niceval > 19)
 339                niceval = 19;
 340
 341        read_lock(&tasklist_lock);
 342        switch (which) {
 343                case PRIO_PROCESS:
 344                        if (!who)
 345                                who = current->pid;
 346                        p = find_task_by_pid(who);
 347                        if (p)
 348                                error = set_one_prio(p, niceval, error);
 349                        break;
 350                case PRIO_PGRP:
 351                        if (!who)
 352                                who = process_group(current);
 353                        do_each_task_pid(who, PIDTYPE_PGID, p) {
 354                                error = set_one_prio(p, niceval, error);
 355                        } while_each_task_pid(who, PIDTYPE_PGID, p);
 356                        break;
 357                case PRIO_USER:
 358                        user = current->user;
 359                        if (!who)
 360                                who = current->uid;
 361                        else
 362                                if ((who != current->uid) && !(user = find_user(who)))
 363                                        goto out_unlock;        /* No processes for this user */
 364
 365                        do_each_thread(g, p)
 366                                if (p->uid == who)
 367                                        error = set_one_prio(p, niceval, error);
 368                        while_each_thread(g, p);
 369                        if (who != current->uid)
 370                                free_uid(user);         /* For find_user() */
 371                        break;
 372        }
 373out_unlock:
 374        read_unlock(&tasklist_lock);
 375out:
 376        return error;
 377}
 378
 379/*
 380 * Ugh. To avoid negative return values, "getpriority()" will
 381 * not return the normal nice-value, but a negated value that
 382 * has been offset by 20 (ie it returns 40..1 instead of -20..19)
 383 * to stay compatible.
 384 */
 385asmlinkage long sys_getpriority(int which, int who)
 386{
 387        struct task_struct *g, *p;
 388        struct user_struct *user;
 389        long niceval, retval = -ESRCH;
 390
 391        if (which > 2 || which < 0)
 392                return -EINVAL;
 393
 394        read_lock(&tasklist_lock);
 395        switch (which) {
 396                case PRIO_PROCESS:
 397                        if (!who)
 398                                who = current->pid;
 399                        p = find_task_by_pid(who);
 400                        if (p) {
 401                                niceval = 20 - task_nice(p);
 402                                if (niceval > retval)
 403                                        retval = niceval;
 404                        }
 405                        break;
 406                case PRIO_PGRP:
 407                        if (!who)
 408                                who = process_group(current);
 409                        do_each_task_pid(who, PIDTYPE_PGID, p) {
 410                                niceval = 20 - task_nice(p);
 411                                if (niceval > retval)
 412                                        retval = niceval;
 413                        } while_each_task_pid(who, PIDTYPE_PGID, p);
 414                        break;
 415                case PRIO_USER:
 416                        user = current->user;
 417                        if (!who)
 418                                who = current->uid;
 419                        else
 420                                if ((who != current->uid) && !(user = find_user(who)))
 421                                        goto out_unlock;        /* No processes for this user */
 422
 423                        do_each_thread(g, p)
 424                                if (p->uid == who) {
 425                                        niceval = 20 - task_nice(p);
 426                                        if (niceval > retval)
 427                                                retval = niceval;
 428                                }
 429                        while_each_thread(g, p);
 430                        if (who != current->uid)
 431                                free_uid(user);         /* for find_user() */
 432                        break;
 433        }
 434out_unlock:
 435        read_unlock(&tasklist_lock);
 436
 437        return retval;
 438}
 439
 440
 441/*
 442 * Reboot system call: for obvious reasons only root may call it,
 443 * and even root needs to set up some magic numbers in the registers
 444 * so that some mistake won't make this reboot the whole machine.
 445 * You can also set the meaning of the ctrl-alt-del-key here.
 446 *
 447 * reboot doesn't sync: do that yourself before calling this.
 448 */
 449asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user * arg)
 450{
 451        char buffer[256];
 452
 453        /* We only trust the superuser with rebooting the system. */
 454        if (!capable(CAP_SYS_BOOT))
 455                return -EPERM;
 456
 457        /* For safety, we require "magic" arguments. */
 458        if (magic1 != LINUX_REBOOT_MAGIC1 ||
 459            (magic2 != LINUX_REBOOT_MAGIC2 &&
 460                        magic2 != LINUX_REBOOT_MAGIC2A &&
 461                        magic2 != LINUX_REBOOT_MAGIC2B &&
 462                        magic2 != LINUX_REBOOT_MAGIC2C))
 463                return -EINVAL;
 464
 465        lock_kernel();
 466        switch (cmd) {
 467        case LINUX_REBOOT_CMD_RESTART:
 468                notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
 469                system_state = SYSTEM_RESTART;
 470                device_shutdown();
 471                printk(KERN_EMERG "Restarting system.\n");
 472                machine_restart(NULL);
 473                break;
 474
 475        case LINUX_REBOOT_CMD_CAD_ON:
 476                C_A_D = 1;
 477                break;
 478
 479        case LINUX_REBOOT_CMD_CAD_OFF:
 480                C_A_D = 0;
 481                break;
 482
 483        case LINUX_REBOOT_CMD_HALT:
 484                notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
 485                system_state = SYSTEM_HALT;
 486                device_shutdown();
 487                printk(KERN_EMERG "System halted.\n");
 488                machine_halt();
 489                unlock_kernel();
 490                do_exit(0);
 491                break;
 492
 493        case LINUX_REBOOT_CMD_POWER_OFF:
 494                notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
 495                system_state = SYSTEM_POWER_OFF;
 496                device_shutdown();
 497                printk(KERN_EMERG "Power down.\n");
 498                machine_power_off();
 499                unlock_kernel();
 500                do_exit(0);
 501                break;
 502
 503        case LINUX_REBOOT_CMD_RESTART2:
 504                if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
 505                        unlock_kernel();
 506                        return -EFAULT;
 507                }
 508                buffer[sizeof(buffer) - 1] = '\0';
 509
 510                notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer);
 511                system_state = SYSTEM_RESTART;
 512                device_shutdown();
 513                printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
 514                machine_restart(buffer);
 515                break;
 516
 517#ifdef CONFIG_SOFTWARE_SUSPEND
 518        case LINUX_REBOOT_CMD_SW_SUSPEND:
 519                {
 520                        int ret = software_suspend();
 521                        unlock_kernel();
 522                        return ret;
 523                }
 524#endif
 525
 526        default:
 527                unlock_kernel();
 528                return -EINVAL;
 529        }
 530        unlock_kernel();
 531        return 0;
 532}
 533
 534static void deferred_cad(void *dummy)
 535{
 536        notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
 537        machine_restart(NULL);
 538}
 539
 540/*
 541 * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
 542 * As it's called within an interrupt, it may NOT sync: the only choice
 543 * is whether to reboot at once, or just ignore the ctrl-alt-del.
 544 */
 545void ctrl_alt_del(void)
 546{
 547        static DECLARE_WORK(cad_work, deferred_cad, NULL);
 548
 549        if (C_A_D)
 550                schedule_work(&cad_work);
 551        else
 552                kill_proc(cad_pid, SIGINT, 1);
 553}
 554        
 555
 556/*
 557 * Unprivileged users may change the real gid to the effective gid
 558 * or vice versa.  (BSD-style)
 559 *
 560 * If you set the real gid at all, or set the effective gid to a value not
 561 * equal to the real gid, then the saved gid is set to the new effective gid.
 562 *
 563 * This makes it possible for a setgid program to completely drop its
 564 * privileges, which is often a useful assertion to make when you are doing
 565 * a security audit over a program.
 566 *
 567 * The general idea is that a program which uses just setregid() will be
 568 * 100% compatible with BSD.  A program which uses just setgid() will be
 569 * 100% compatible with POSIX with saved IDs. 
 570 *
 571 * SMP: There are not races, the GIDs are checked only by filesystem
 572 *      operations (as far as semantic preservation is concerned).
 573 */
 574asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 575{
 576        int old_rgid = current->gid;
 577        int old_egid = current->egid;
 578        int new_rgid = old_rgid;
 579        int new_egid = old_egid;
 580        int retval;
 581
 582        retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE);
 583        if (retval)
 584                return retval;
 585
 586        if (rgid != (gid_t) -1) {
 587                if ((old_rgid == rgid) ||
 588                    (current->egid==rgid) ||
 589                    capable(CAP_SETGID))
 590                        new_rgid = rgid;
 591                else
 592                        return -EPERM;
 593        }
 594        if (egid != (gid_t) -1) {
 595                if ((old_rgid == egid) ||
 596                    (current->egid == egid) ||
 597                    (current->sgid == egid) ||
 598                    capable(CAP_SETGID))
 599                        new_egid = egid;
 600                else {
 601                        return -EPERM;
 602                }
 603        }
 604        if (new_egid != old_egid)
 605        {
 606                current->mm->dumpable = suid_dumpable;
 607                wmb();
 608        }
 609        if (rgid != (gid_t) -1 ||
 610            (egid != (gid_t) -1 && egid != old_rgid))
 611                current->sgid = new_egid;
 612        current->fsgid = new_egid;
 613        current->egid = new_egid;
 614        current->gid = new_rgid;
 615        key_fsgid_changed(current);
 616        return 0;
 617}
 618
 619/*
 620 * setgid() is implemented like SysV w/ SAVED_IDS 
 621 *
 622 * SMP: Same implicit races as above.
 623 */
 624asmlinkage long sys_setgid(gid_t gid)
 625{
 626        int old_egid = current->egid;
 627        int retval;
 628
 629        retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID);
 630        if (retval)
 631                return retval;
 632
 633        if (capable(CAP_SETGID))
 634        {
 635                if(old_egid != gid)
 636                {
 637                        current->mm->dumpable = suid_dumpable;
 638                        wmb();
 639                }
 640                current->gid = current->egid = current->sgid = current->fsgid = gid;
 641        }
 642        else if ((gid == current->gid) || (gid == current->sgid))
 643        {
 644                if(old_egid != gid)
 645                {
 646                        current->mm->dumpable = suid_dumpable;
 647                        wmb();
 648                }
 649                current->egid = current->fsgid = gid;
 650        }
 651        else
 652                return -EPERM;
 653        key_fsgid_changed(current);
 654        return 0;
 655}
 656  
 657static int set_user(uid_t new_ruid, int dumpclear)
 658{
 659        struct user_struct *new_user;
 660
 661        new_user = alloc_uid(new_ruid);
 662        if (!new_user)
 663                return -EAGAIN;
 664
 665        if (atomic_read(&new_user->processes) >=
 666                                current->rlim[RLIMIT_NPROC].rlim_cur &&
 667                        new_user != &root_user) {
 668                free_uid(new_user);
 669                return -EAGAIN;
 670        }
 671
 672        switch_uid(new_user);
 673
 674        if(dumpclear)
 675        {
 676                current->mm->dumpable = suid_dumpable;
 677                wmb();
 678        }
 679        current->uid = new_ruid;
 680        return 0;
 681}
 682
 683/*
 684 * Unprivileged users may change the real uid to the effective uid
 685 * or vice versa.  (BSD-style)
 686 *
 687 * If you set the real uid at all, or set the effective uid to a value not
 688 * equal to the real uid, then the saved uid is set to the new effective uid.
 689 *
 690 * This makes it possible for a setuid program to completely drop its
 691 * privileges, which is often a useful assertion to make when you are doing
 692 * a security audit over a program.
 693 *
 694 * The general idea is that a program which uses just setreuid() will be
 695 * 100% compatible with BSD.  A program which uses just setuid() will be
 696 * 100% compatible with POSIX with saved IDs. 
 697 */
 698asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 699{
 700        int old_ruid, old_euid, old_suid, new_ruid, new_euid;
 701        int retval;
 702
 703        retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE);
 704        if (retval)
 705                return retval;
 706
 707        new_ruid = old_ruid = current->uid;
 708        new_euid = old_euid = current->euid;
 709        old_suid = current->suid;
 710
 711        if (ruid != (uid_t) -1) {
 712                new_ruid = ruid;
 713                if ((old_ruid != ruid) &&
 714                    (current->euid != ruid) &&
 715                    !capable(CAP_SETUID))
 716                        return -EPERM;
 717        }
 718
 719        if (euid != (uid_t) -1) {
 720                new_euid = euid;
 721                if ((old_ruid != euid) &&
 722                    (current->euid != euid) &&
 723                    (current->suid != euid) &&
 724                    !capable(CAP_SETUID))
 725                        return -EPERM;
 726        }
 727
 728        if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0)
 729                return -EAGAIN;
 730
 731        if (new_euid != old_euid)
 732        {
 733                current->mm->dumpable = suid_dumpable;
 734                wmb();
 735        }
 736        current->fsuid = current->euid = new_euid;
 737        if (ruid != (uid_t) -1 ||
 738            (euid != (uid_t) -1 && euid != old_ruid))
 739                current->suid = current->euid;
 740        current->fsuid = current->euid;
 741
 742        key_fsuid_changed(current);
 743
 744        return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE);
 745}
 746
 747
 748                
 749/*
 750 * setuid() is implemented like SysV with SAVED_IDS 
 751 * 
 752 * Note that SAVED_ID's is deficient in that a setuid root program
 753 * like sendmail, for example, cannot set its uid to be a normal 
 754 * user and then switch back, because if you're root, setuid() sets
 755 * the saved uid too.  If you don't like this, blame the bright people
 756 * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
 757 * will allow a root program to temporarily drop privileges and be able to
 758 * regain them by swapping the real and effective uid.  
 759 */
 760asmlinkage long sys_setuid(uid_t uid)
 761{
 762        int old_euid = current->euid;
 763        int old_ruid, old_suid, new_ruid, new_suid;
 764        int retval;
 765
 766        retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID);
 767        if (retval)
 768                return retval;
 769
 770        old_ruid = new_ruid = current->uid;
 771        old_suid = current->suid;
 772        new_suid = old_suid;
 773        
 774        if (capable(CAP_SETUID)) {
 775                if (uid != old_ruid && set_user(uid, old_euid != uid) < 0)
 776                        return -EAGAIN;
 777                new_suid = uid;
 778        } else if ((uid != current->uid) && (uid != new_suid))
 779                return -EPERM;
 780
 781        if (old_euid != uid)
 782        {
 783                current->mm->dumpable = suid_dumpable;
 784                wmb();
 785        }
 786        current->fsuid = current->euid = uid;
 787        current->suid = new_suid;
 788
 789        key_fsuid_changed(current);
 790
 791        return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID);
 792}
 793
 794
 795/*
 796 * This function implements a generic ability to update ruid, euid,
 797 * and suid.  This allows you to implement the 4.4 compatible seteuid().
 798 */
 799asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 800{
 801        int old_ruid = current->uid;
 802        int old_euid = current->euid;
 803        int old_suid = current->suid;
 804        int retval;
 805
 806        retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
 807        if (retval)
 808                return retval;
 809
 810        if (!capable(CAP_SETUID)) {
 811                if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
 812                    (ruid != current->euid) && (ruid != current->suid))
 813                        return -EPERM;
 814                if ((euid != (uid_t) -1) && (euid != current->uid) &&
 815                    (euid != current->euid) && (euid != current->suid))
 816                        return -EPERM;
 817                if ((suid != (uid_t) -1) && (suid != current->uid) &&
 818                    (suid != current->euid) && (suid != current->suid))
 819                        return -EPERM;
 820        }
 821        if (ruid != (uid_t) -1) {
 822                if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0)
 823                        return -EAGAIN;
 824        }
 825        if (euid != (uid_t) -1) {
 826                if (euid != current->euid)
 827                {
 828                        current->mm->dumpable = suid_dumpable;
 829                        wmb();
 830                }
 831                current->euid = euid;
 832        }
 833        current->fsuid = current->euid;
 834        if (suid != (uid_t) -1)
 835                current->suid = suid;
 836
 837        key_fsuid_changed(current);
 838
 839        return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES);
 840}
 841
 842asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid)
 843{
 844        int retval;
 845
 846        if (!(retval = put_user(current->uid, ruid)) &&
 847            !(retval = put_user(current->euid, euid)))
 848                retval = put_user(current->suid, suid);
 849
 850        return retval;
 851}
 852
 853/*
 854 * Same as above, but for rgid, egid, sgid.
 855 */
 856asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 857{
 858        int retval;
 859
 860        retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
 861        if (retval)
 862                return retval;
 863
 864        if (!capable(CAP_SETGID)) {
 865                if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
 866                    (rgid != current->egid) && (rgid != current->sgid))
 867                        return -EPERM;
 868                if ((egid != (gid_t) -1) && (egid != current->gid) &&
 869                    (egid != current->egid) && (egid != current->sgid))
 870                        return -EPERM;
 871                if ((sgid != (gid_t) -1) && (sgid != current->gid) &&
 872                    (sgid != current->egid) && (sgid != current->sgid))
 873                        return -EPERM;
 874        }
 875        if (egid != (gid_t) -1) {
 876                if (egid != current->egid)
 877                {
 878                        current->mm->dumpable = suid_dumpable;
 879                        wmb();
 880                }
 881                current->egid = egid;
 882        }
 883        current->fsgid = current->egid;
 884        if (rgid != (gid_t) -1)
 885                current->gid = rgid;
 886        if (sgid != (gid_t) -1)
 887                current->sgid = sgid;
 888
 889        key_fsgid_changed(current);
 890        return 0;
 891}
 892
 893asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid)
 894{
 895        int retval;
 896
 897        if (!(retval = put_user(current->gid, rgid)) &&
 898            !(retval = put_user(current->egid, egid)))
 899                retval = put_user(current->sgid, sgid);
 900
 901        return retval;
 902}
 903
 904
 905/*
 906 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
 907 * is used for "access()" and for the NFS daemon (letting nfsd stay at
 908 * whatever uid it wants to). It normally shadows "euid", except when
 909 * explicitly set by setfsuid() or for access..
 910 */
 911asmlinkage long sys_setfsuid(uid_t uid)
 912{
 913        int old_fsuid;
 914
 915        old_fsuid = current->fsuid;
 916        if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS))
 917                return old_fsuid;
 918
 919        if (uid == current->uid || uid == current->euid ||
 920            uid == current->suid || uid == current->fsuid || 
 921            capable(CAP_SETUID))
 922        {
 923                if (uid != old_fsuid)
 924                {
 925                        current->mm->dumpable = suid_dumpable;
 926                        wmb();
 927                }
 928                current->fsuid = uid;
 929        }
 930
 931        key_fsuid_changed(current);
 932
 933        security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS);
 934
 935        return old_fsuid;
 936}
 937
 938/*
 939 * Samma p\xE5 svenska..
 940 */
 941asmlinkage long sys_setfsgid(gid_t gid)
 942{
 943        int old_fsgid;
 944
 945        old_fsgid = current->fsgid;
 946        if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
 947                return old_fsgid;
 948
 949        if (gid == current->gid || gid == current->egid ||
 950            gid == current->sgid || gid == current->fsgid || 
 951            capable(CAP_SETGID))
 952        {
 953                if (gid != old_fsgid)
 954                {
 955                        current->mm->dumpable = suid_dumpable;
 956                        wmb();
 957                }
 958                current->fsgid = gid;
 959                key_fsgid_changed(current);
 960        }
 961        return old_fsgid;
 962}
 963
 964asmlinkage long sys_times(struct tms __user * tbuf)
 965{
 966        /*
 967         *      In the SMP world we might just be unlucky and have one of
 968         *      the times increment as we use it. Since the value is an
 969         *      atomically safe type this is just fine. Conceptually its
 970         *      as if the syscall took an instant longer to occur.
 971         */
 972        if (tbuf) {
 973                struct tms tmp;
 974                struct task_struct *tsk = current;
 975                struct task_struct *t;
 976                unsigned long utime, stime, cutime, cstime;
 977
 978                read_lock(&tasklist_lock);
 979                utime = tsk->signal->utime;
 980                stime = tsk->signal->stime;
 981                t = tsk;
 982                do {
 983                        utime += t->utime;
 984                        stime += t->stime;
 985                        t = next_thread(t);
 986                } while (t != tsk);
 987
 988                /*
 989                 * While we have tasklist_lock read-locked, no dying thread
 990                 * can be updating current->signal->[us]time.  Instead,
 991                 * we got their counts included in the live thread loop.
 992                 * However, another thread can come in right now and
 993                 * do a wait call that updates current->signal->c[us]time.
 994                 * To make sure we always see that pair updated atomically,
 995                 * we take the siglock around fetching them.
 996                 */
 997                spin_lock_irq(&tsk->sighand->siglock);
 998                cutime = tsk->signal->cutime;
 999                cstime = tsk->signal->cstime;
1000                spin_unlock_irq(&tsk->sighand->siglock);
1001                read_unlock(&tasklist_lock);
1002
1003                tmp.tms_utime = jiffies_to_clock_t(utime);
1004                tmp.tms_stime = jiffies_to_clock_t(stime);
1005                tmp.tms_cutime = jiffies_to_clock_t(cutime);
1006                tmp.tms_cstime = jiffies_to_clock_t(cstime);
1007                if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
1008                        return -EFAULT;
1009        }
1010        return (long) jiffies_64_to_clock_t(get_jiffies_64());
1011}
1012
1013/*
1014 * This needs some heavy checking ...
1015 * I just haven't the stomach for it. I also don't fully
1016 * understand sessions/pgrp etc. Let somebody who does explain it.
1017 *
1018 * OK, I think I have the protection semantics right.... this is really
1019 * only important on a multi-user system anyway, to make sure one user
1020 * can't send a signal to a process owned by another.  -TYT, 12/12/91
1021 *
1022 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
1023 * LBT 04.03.94
1024 */
1025
1026asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
1027{
1028        struct task_struct *p;
1029        int err = -EINVAL;
1030
1031        if (!pid)
1032                pid = current->pid;
1033        if (!pgid)
1034                pgid = pid;
1035        if (pgid < 0)
1036                return -EINVAL;
1037
1038        /* From this point forward we keep holding onto the tasklist lock
1039         * so that our parent does not change from under us. -DaveM
1040         */
1041        write_lock_irq(&tasklist_lock);
1042
1043        err = -ESRCH;
1044        p = find_task_by_pid(pid);
1045        if (!p)
1046                goto out;
1047
1048        err = -EINVAL;
1049        if (!thread_group_leader(p))
1050                goto out;
1051
1052        if (p->parent == current || p->real_parent == current) {
1053                err = -EPERM;
1054                if (p->signal->session != current->signal->session)
1055                        goto out;
1056                err = -EACCES;
1057                if (p->did_exec)
1058                        goto out;
1059        } else {
1060                err = -ESRCH;
1061                if (p != current)
1062                        goto out;
1063        }
1064
1065        err = -EPERM;
1066        if (p->signal->leader)
1067                goto out;
1068
1069        if (pgid != pid) {
1070                struct task_struct *p;
1071
1072                do_each_task_pid(pgid, PIDTYPE_PGID, p) {
1073                        if (p->signal->session == current->signal->session)
1074                                goto ok_pgid;
1075                } while_each_task_pid(pgid, PIDTYPE_PGID, p);
1076                goto out;
1077        }
1078
1079ok_pgid:
1080        err = security_task_setpgid(p, pgid);
1081        if (err)
1082                goto out;
1083
1084        if (process_group(p) != pgid) {
1085                detach_pid(p, PIDTYPE_PGID);
1086                p->signal->pgrp = pgid;
1087                attach_pid(p, PIDTYPE_PGID, pgid);
1088        }
1089
1090        err = 0;
1091out:
1092        /* All paths lead to here, thus we are safe. -DaveM */
1093        write_unlock_irq(&tasklist_lock);
1094        return err;
1095}
1096
1097asmlinkage long sys_getpgid(pid_t pid)
1098{
1099        if (!pid) {
1100                return process_group(current);
1101        } else {
1102                int retval;
1103                struct task_struct *p;
1104
1105                read_lock(&tasklist_lock);
1106                p = find_task_by_pid(pid);
1107
1108                retval = -ESRCH;
1109                if (p) {
1110                        retval = security_task_getpgid(p);
1111                        if (!retval)
1112                                retval = process_group(p);
1113                }
1114                read_unlock(&tasklist_lock);
1115                return retval;
1116        }
1117}
1118
1119#ifdef __ARCH_WANT_SYS_GETPGRP
1120
1121asmlinkage long sys_getpgrp(void)
1122{
1123        /* SMP - assuming writes are word atomic this is fine */
1124        return process_group(current);
1125}
1126
1127#endif
1128
1129asmlinkage long sys_getsid(pid_t pid)
1130{
1131        if (!pid) {
1132                return current->signal->session;
1133        } else {
1134                int retval;
1135                struct task_struct *p;
1136
1137                read_lock(&tasklist_lock);
1138                p = find_task_by_pid(pid);
1139
1140                retval = -ESRCH;
1141                if(p) {
1142                        retval = security_task_getsid(p);
1143                        if (!retval)
1144                                retval = p->signal->session;
1145                }
1146                read_unlock(&tasklist_lock);
1147                return retval;
1148        }
1149}
1150
1151asmlinkage long sys_setsid(void)
1152{
1153        struct pid *pid;
1154        int err = -EPERM;
1155
1156        if (!thread_group_leader(current))
1157                return -EINVAL;
1158
1159        down(&tty_sem);
1160        write_lock_irq(&tasklist_lock);
1161
1162        pid = find_pid(PIDTYPE_PGID, current->pid);
1163        if (pid)
1164                goto out;
1165
1166        current->signal->leader = 1;
1167        __set_special_pids(current->pid, current->pid);
1168        current->signal->tty = NULL;
1169        current->signal->tty_old_pgrp = 0;
1170        err = process_group(current);
1171out:
1172        write_unlock_irq(&tasklist_lock);
1173        up(&tty_sem);
1174        return err;
1175}
1176
1177/*
1178 * Supplementary group IDs
1179 */
1180
1181/* init to 2 - one for init_task, one to ensure it is never freed */
1182struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
1183
1184struct group_info *groups_alloc(int gidsetsize)
1185{
1186        struct group_info *group_info;
1187        int nblocks;
1188        int i;
1189
1190        nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK;
1191        /* Make sure we always allocate at least one indirect block pointer */
1192        nblocks = nblocks ? : 1;
1193        group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER);
1194        if (!group_info)
1195                return NULL;
1196        group_info->ngroups = gidsetsize;
1197        group_info->nblocks = nblocks;
1198        atomic_set(&group_info->usage, 1);
1199
1200        if (gidsetsize <= NGROUPS_SMALL) {
1201                group_info->blocks[0] = group_info->small_block;
1202        } else {
1203                for (i = 0; i < nblocks; i++) {
1204                        gid_t *b;
1205                        b = (void *)__get_free_page(GFP_USER);
1206                        if (!b)
1207                                goto out_undo_partial_alloc;
1208                        group_info->blocks[i] = b;
1209                }
1210        }
1211        return group_info;
1212
1213out_undo_partial_alloc:
1214        while (--i >= 0) {
1215                free_page((unsigned long)group_info->blocks[i]);
1216        }
1217        kfree(group_info);
1218        return NULL;
1219}
1220
1221EXPORT_SYMBOL(groups_alloc);
1222
1223void groups_free(struct group_info *group_info)
1224{
1225        if (group_info->blocks[0] != group_info->small_block) {
1226                int i;
1227                for (i = 0; i < group_info->nblocks; i++)
1228                        free_page((unsigned long)group_info->blocks[i]);
1229        }
1230        kfree(group_info);
1231}
1232
1233EXPORT_SYMBOL(groups_free);
1234
1235/* export the group_info to a user-space array */
1236static int groups_to_user(gid_t __user *grouplist,
1237    struct group_info *group_info)
1238{
1239        int i;
1240        int count = group_info->ngroups;
1241
1242        for (i = 0; i < group_info->nblocks; i++) {
1243                int cp_count = min(NGROUPS_PER_BLOCK, count);
1244                int off = i * NGROUPS_PER_BLOCK;
1245                int len = cp_count * sizeof(*grouplist);
1246
1247                if (copy_to_user(grouplist+off, group_info->blocks[i], len))
1248                        return -EFAULT;
1249
1250                count -= cp_count;
1251        }
1252        return 0;
1253}
1254
1255/* fill a group_info from a user-space array - it must be allocated already */
1256static int groups_from_user(struct group_info *group_info,
1257    gid_t __user *grouplist)
1258 {
1259        int i;
1260        int count = group_info->ngroups;
1261
1262        for (i = 0; i < group_info->nblocks; i++) {
1263                int cp_count = min(NGROUPS_PER_BLOCK, count);
1264                int off = i * NGROUPS_PER_BLOCK;
1265                int len = cp_count * sizeof(*grouplist);
1266
1267                if (copy_from_user(group_info->blocks[i], grouplist+off, len))
1268                        return -EFAULT;
1269
1270                count -= cp_count;
1271        }
1272        return 0;
1273}
1274
1275/* a simple shell-metzner sort */
1276static void groups_sort(struct group_info *group_info)
1277{
1278        int base, max, stride;
1279        int gidsetsize = group_info->ngroups;
1280
1281        for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
1282                ; /* nothing */
1283        stride /= 3;
1284
1285        while (stride) {
1286                max = gidsetsize - stride;
1287                for (base = 0; base < max; base++) {
1288                        int left = base;
1289                        int right = left + stride;
1290                        gid_t tmp = GROUP_AT(group_info, right);
1291
1292                        while (left >= 0 && GROUP_AT(group_info, left) > tmp) {
1293                                GROUP_AT(group_info, right) =
1294                                    GROUP_AT(group_info, left);
1295                                right = left;
1296                                left -= stride;
1297                        }
1298                        GROUP_AT(group_info, right) = tmp;
1299                }
1300                stride /= 3;
1301        }
1302}
1303
1304/* a simple bsearch */
1305int groups_search(struct group_info *group_info, gid_t grp)
1306{
1307        int left, right;
1308
1309        if (!group_info)
1310                return 0;
1311
1312        left = 0;
1313        right = group_info->ngroups;
1314        while (left < right) {
1315                int mid = (left+right)/2;
1316                int cmp = grp - GROUP_AT(group_info, mid);
1317                if (cmp > 0)
1318                        left = mid + 1;
1319                else if (cmp < 0)
1320                        right = mid;
1321                else
1322                        return 1;
1323        }
1324        return 0;
1325}
1326
1327/* validate and set current->group_info */
1328int set_current_groups(struct group_info *group_info)
1329{
1330        int retval;
1331        struct group_info *old_info;
1332
1333        retval = security_task_setgroups(group_info);
1334        if (retval)
1335                return retval;
1336
1337        groups_sort(group_info);
1338        get_group_info(group_info);
1339
1340        task_lock(current);
1341        old_info = current->group_info;
1342        current->group_info = group_info;
1343        task_unlock(current);
1344
1345        put_group_info(old_info);
1346
1347        return 0;
1348}
1349
1350EXPORT_SYMBOL(set_current_groups);
1351
1352asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
1353{
1354        int i = 0;
1355
1356        /*
1357         *      SMP: Nobody else can change our grouplist. Thus we are
1358         *      safe.
1359         */
1360
1361        if (gidsetsize < 0)
1362                return -EINVAL;
1363
1364        /* no need to grab task_lock here; it cannot change */
1365        get_group_info(current->group_info);
1366        i = current->group_info->ngroups;
1367        if (gidsetsize) {
1368                if (i > gidsetsize) {
1369                        i = -EINVAL;
1370                        goto out;
1371                }
1372                if (groups_to_user(grouplist, current->group_info)) {
1373                        i = -EFAULT;
1374                        goto out;
1375                }
1376        }
1377out:
1378        put_group_info(current->group_info);
1379        return i;
1380}
1381
1382/*
1383 *      SMP: Our groups are copy-on-write. We can set them safely
1384 *      without another task interfering.
1385 */
1386 
1387asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist)
1388{
1389        struct group_info *group_info;
1390        int retval;
1391
1392        if (!capable(CAP_SETGID))
1393                return -EPERM;
1394        if ((unsigned)gidsetsize > NGROUPS_MAX)
1395                return -EINVAL;
1396
1397        group_info = groups_alloc(gidsetsize);
1398        if (!group_info)
1399                return -ENOMEM;
1400        retval = groups_from_user(group_info, grouplist);
1401        if (retval) {
1402                put_group_info(group_info);
1403                return retval;
1404        }
1405
1406        retval = set_current_groups(group_info);
1407        put_group_info(group_info);
1408
1409        return retval;
1410}
1411
1412/*
1413 * Check whether we're fsgid/egid or in the supplemental group..
1414 */
1415int in_group_p(gid_t grp)
1416{
1417        int retval = 1;
1418        if (grp != current->fsgid) {
1419                get_group_info(current->group_info);
1420                retval = groups_search(current->group_info, grp);
1421                put_group_info(current->group_info);
1422        }
1423        return retval;
1424}
1425
1426EXPORT_SYMBOL(in_group_p);
1427
1428int in_egroup_p(gid_t grp)
1429{
1430        int retval = 1;
1431        if (grp != current->egid) {
1432                get_group_info(current->group_info);
1433                retval = groups_search(current->group_info, grp);
1434                put_group_info(current->group_info);
1435        }
1436        return retval;
1437}
1438
1439EXPORT_SYMBOL(in_egroup_p);
1440
1441DECLARE_RWSEM(uts_sem);
1442
1443EXPORT_SYMBOL(uts_sem);
1444
1445asmlinkage long sys_newuname(struct new_utsname __user * name)
1446{
1447        int errno = 0;
1448
1449        down_read(&uts_sem);
1450        if (copy_to_user(name,&system_utsname,sizeof *name))
1451                errno = -EFAULT;
1452        up_read(&uts_sem);
1453        return errno;
1454}
1455
1456asmlinkage long sys_sethostname(char __user *name, int len)
1457{
1458        int errno;
1459        char tmp[__NEW_UTS_LEN];
1460
1461        if (!capable(CAP_SYS_ADMIN))
1462                return -EPERM;
1463        if (len < 0 || len > __NEW_UTS_LEN)
1464                return -EINVAL;
1465        down_write(&uts_sem);
1466        errno = -EFAULT;
1467        if (!copy_from_user(tmp, name, len)) {
1468                memcpy(system_utsname.nodename, tmp, len);
1469                system_utsname.nodename[len] = 0;
1470                errno = 0;
1471        }
1472        up_write(&uts_sem);
1473        return errno;
1474}
1475
1476#ifdef __ARCH_WANT_SYS_GETHOSTNAME
1477
1478asmlinkage long sys_gethostname(char __user *name, int len)
1479{
1480        int i, errno;
1481
1482        if (len < 0)
1483                return -EINVAL;
1484        down_read(&uts_sem);
1485        i = 1 + strlen(system_utsname.nodename);
1486        if (i > len)
1487                i = len;
1488        errno = 0;
1489        if (copy_to_user(name, system_utsname.nodename, i))
1490                errno = -EFAULT;
1491        up_read(&uts_sem);
1492        return errno;
1493}
1494
1495#endif
1496
1497/*
1498 * Only setdomainname; getdomainname can be implemented by calling
1499 * uname()
1500 */
1501asmlinkage long sys_setdomainname(char __user *name, int len)
1502{
1503        int errno;
1504        char tmp[__NEW_UTS_LEN];
1505
1506        if (!capable(CAP_SYS_ADMIN))
1507                return -EPERM;
1508        if (len < 0 || len > __NEW_UTS_LEN)
1509                return -EINVAL;
1510
1511        down_write(&uts_sem);
1512        errno = -EFAULT;
1513        if (!copy_from_user(tmp, name, len)) {
1514                memcpy(system_utsname.domainname, tmp, len);
1515                system_utsname.domainname[len] = 0;
1516                errno = 0;
1517        }
1518        up_write(&uts_sem);
1519        return errno;
1520}
1521
1522asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim)
1523{
1524        if (resource >= RLIM_NLIMITS)
1525                return -EINVAL;
1526        else
1527                return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim))
1528                        ? -EFAULT : 0;
1529}
1530
1531#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
1532
1533/*
1534 *      Back compatibility for getrlimit. Needed for some apps.
1535 */
1536 
1537asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim)
1538{
1539        struct rlimit x;
1540        if (resource >= RLIM_NLIMITS)
1541                return -EINVAL;
1542
1543        memcpy(&x, current->rlim + resource, sizeof(*rlim));
1544        if(x.rlim_cur > 0x7FFFFFFF)
1545                x.rlim_cur = 0x7FFFFFFF;
1546        if(x.rlim_max > 0x7FFFFFFF)
1547                x.rlim_max = 0x7FFFFFFF;
1548        return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1549}
1550
1551#endif
1552
1553asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1554{
1555        struct rlimit new_rlim, *old_rlim;
1556        int retval;
1557
1558        if (resource >= RLIM_NLIMITS)
1559                return -EINVAL;
1560        if(copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1561                return -EFAULT;
1562       if (new_rlim.rlim_cur > new_rlim.rlim_max)
1563               return -EINVAL;
1564        old_rlim = current->rlim + resource;
1565        if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
1566             (new_rlim.rlim_max > old_rlim->rlim_max)) &&
1567            !capable(CAP_SYS_RESOURCE))
1568                return -EPERM;
1569        if (resource == RLIMIT_NOFILE) {
1570                if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN)
1571                        return -EPERM;
1572        }
1573
1574        retval = security_task_setrlimit(resource, &new_rlim);
1575        if (retval)
1576                return retval;
1577
1578        *old_rlim = new_rlim;
1579        return 0;
1580}
1581
1582/*
1583 * It would make sense to put struct rusage in the task_struct,
1584 * except that would make the task_struct be *really big*.  After
1585 * task_struct gets moved into malloc'ed memory, it would
1586 * make sense to do this.  It will make moving the rest of the information
1587 * a lot simpler!  (Which we're not doing right now because we're not
1588 * measuring them yet).
1589 *
1590 * This expects to be called with tasklist_lock read-locked or better,
1591 * and the siglock not locked.  It may momentarily take the siglock.
1592 *
1593 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
1594 * races with threads incrementing their own counters.  But since word
1595 * reads are atomic, we either get new values or old values and we don't
1596 * care which for the sums.  We always take the siglock to protect reading
1597 * the c* fields from p->signal from races with exit.c updating those
1598 * fields when reaping, so a sample either gets all the additions of a
1599 * given child after it's reaped, or none so this sample is before reaping.
1600 */
1601
1602void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1603{
1604        struct task_struct *t;
1605        unsigned long flags;
1606        unsigned long utime, stime;
1607
1608        memset((char *) r, 0, sizeof *r);
1609
1610        if (unlikely(!p->signal))
1611                return;
1612
1613        switch (who) {
1614                case RUSAGE_CHILDREN:
1615                        spin_lock_irqsave(&p->sighand->siglock, flags);
1616                        utime = p->signal->cutime;
1617                        stime = p->signal->cstime;
1618                        r->ru_nvcsw = p->signal->cnvcsw;
1619                        r->ru_nivcsw = p->signal->cnivcsw;
1620                        r->ru_minflt = p->signal->cmin_flt;
1621                        r->ru_majflt = p->signal->cmaj_flt;
1622                        r->ru_inblock = p->signal->inblock;
1623                        r->ru_oublock = p->signal->oublock;
1624                        spin_unlock_irqrestore(&p->sighand->siglock, flags);
1625                        jiffies_to_timeval(utime, &r->ru_utime);
1626                        jiffies_to_timeval(stime, &r->ru_stime);
1627                        break;
1628                case RUSAGE_SELF:
1629                        spin_lock_irqsave(&p->sighand->siglock, flags);
1630                        utime = stime = 0;
1631                        goto sum_group;
1632                case RUSAGE_BOTH:
1633                        spin_lock_irqsave(&p->sighand->siglock, flags);
1634                        utime = p->signal->cutime;
1635                        stime = p->signal->cstime;
1636                        r->ru_nvcsw = p->signal->cnvcsw;
1637                        r->ru_nivcsw = p->signal->cnivcsw;
1638                        r->ru_minflt = p->signal->cmin_flt;
1639                        r->ru_majflt = p->signal->cmaj_flt;
1640                        r->ru_inblock = p->signal->inblock;
1641                        r->ru_oublock = p->signal->oublock;
1642                sum_group:
1643                        utime += p->signal->utime;
1644                        stime += p->signal->stime;
1645                        r->ru_nvcsw += p->signal->nvcsw;
1646                        r->ru_nivcsw += p->signal->nivcsw;
1647                        r->ru_minflt += p->signal->min_flt;
1648                        r->ru_majflt += p->signal->maj_flt;
1649                        r->ru_inblock += p->signal->inblock;
1650                        r->ru_oublock += p->signal->oublock;
1651                        t = p;
1652                        do {
1653                                utime += t->utime;
1654                                stime += t->stime;
1655                                r->ru_nvcsw += t->nvcsw;
1656                                r->ru_nivcsw += t->nivcsw;
1657                                r->ru_minflt += t->min_flt;
1658                                r->ru_majflt += t->maj_flt;
1659                                r->ru_inblock += task_io_get_inblock(t);
1660                                r->ru_oublock += task_io_get_oublock(t);
1661                                t = next_thread(t);
1662                        } while (t != p);
1663                        spin_unlock_irqrestore(&p->sighand->siglock, flags);
1664                        jiffies_to_timeval(utime, &r->ru_utime);
1665                        jiffies_to_timeval(stime, &r->ru_stime);
1666                        break;
1667                default:
1668                        BUG();
1669        }
1670}
1671
1672int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
1673{
1674        struct rusage r;
1675        read_lock(&tasklist_lock);
1676        k_getrusage(p, who, &r);
1677        read_unlock(&tasklist_lock);
1678        return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1679}
1680
1681asmlinkage long sys_getrusage(int who, struct rusage __user *ru)
1682{
1683        if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
1684                return -EINVAL;
1685        return getrusage(current, who, ru);
1686}
1687
1688asmlinkage long sys_umask(int mask)
1689{
1690        mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1691        return mask;
1692}
1693    
1694asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1695                          unsigned long arg4, unsigned long arg5)
1696{
1697        int error;
1698        int sig;
1699
1700        error = security_task_prctl(option, arg2, arg3, arg4, arg5);
1701        if (error)
1702                return error;
1703
1704        switch (option) {
1705                case PR_SET_PDEATHSIG:
1706                        sig = arg2;
1707                        if (sig < 0 || sig > _NSIG) {
1708                                error = -EINVAL;
1709                                break;
1710                        }
1711                        current->pdeath_signal = sig;
1712                        break;
1713                case PR_GET_PDEATHSIG:
1714                        error = put_user(current->pdeath_signal, (int __user *)arg2);
1715                        break;
1716                case PR_GET_DUMPABLE:
1717                        if (current->mm->dumpable)
1718                                error = 1;
1719                        break;
1720                case PR_SET_DUMPABLE:
1721                        if (arg2 < 0 || arg2 > 1) {
1722                                error = -EINVAL;
1723                                break;
1724                        }
1725                        current->mm->dumpable = arg2;
1726                        break;
1727
1728                case PR_SET_UNALIGN:
1729                        error = SET_UNALIGN_CTL(current, arg2);
1730                        break;
1731                case PR_GET_UNALIGN:
1732                        error = GET_UNALIGN_CTL(current, arg2);
1733                        break;
1734                case PR_SET_FPEMU:
1735                        error = SET_FPEMU_CTL(current, arg2);
1736                        break;
1737                case PR_GET_FPEMU:
1738                        error = GET_FPEMU_CTL(current, arg2);
1739                        break;
1740                case PR_SET_FPEXC:
1741                        error = SET_FPEXC_CTL(current, arg2);
1742                        break;
1743                case PR_GET_FPEXC:
1744                        error = GET_FPEXC_CTL(current, arg2);
1745                        break;
1746                case PR_GET_TIMING:
1747                        error = PR_TIMING_STATISTICAL;
1748                        break;
1749                case PR_SET_TIMING:
1750                        if (arg2 == PR_TIMING_STATISTICAL)
1751                                error = 0;
1752                        else
1753                                error = -EINVAL;
1754                        break;
1755
1756                case PR_GET_KEEPCAPS:
1757                        if (current->keep_capabilities)
1758                                error = 1;
1759                        break;
1760                case PR_SET_KEEPCAPS:
1761                        if (arg2 != 0 && arg2 != 1) {
1762                                error = -EINVAL;
1763                                break;
1764                        }
1765                        current->keep_capabilities = arg2;
1766                        break;
1767                case PR_SET_NAME: {
1768                        struct task_struct *me = current;
1769                        unsigned char ncomm[sizeof(me->comm)];
1770
1771                        ncomm[sizeof(me->comm)-1] = 0;
1772                        if (strncpy_from_user(ncomm, (char __user *)arg2,
1773                                                sizeof(me->comm)-1) < 0)
1774                                return -EFAULT;
1775                        set_task_comm(me, ncomm);
1776                        return 0;
1777                }
1778                case PR_GET_ENDIAN:
1779                        error = GET_ENDIAN(current, arg2);
1780                        break;
1781                case PR_SET_ENDIAN:
1782                        error = SET_ENDIAN(current, arg2);
1783                        break;
1784
1785                default:
1786                        error = -EINVAL;
1787                        break;
1788        }
1789        return error;
1790}
1791
1792asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep,
1793                           struct getcpu_cache __user *cache)
1794{
1795        int err = 0;
1796        int cpu = smp_processor_id();
1797        if (cpup)
1798                err |= put_user(cpu, cpup);
1799        if (nodep)
1800                err |= put_user(cpu_to_node(cpu), nodep);
1801        if (cache) {
1802                /*
1803                 * The cache is not needed for this implementation,
1804                 * but make sure user programs pass something
1805                 * valid. vsyscall implementations can instead make
1806                 * good use of the cache. Only use t0 and t1 because
1807                 * these are available in both 32bit and 64bit ABI (no
1808                 * need for a compat_getcpu). 32bit has enough
1809                 * padding
1810                 */
1811                unsigned long t0, t1;
1812                get_user(t0, &cache->blob[0]);
1813                get_user(t1, &cache->blob[1]);
1814                t0++;
1815                t1++;
1816                put_user(t0, &cache->blob[0]);
1817                put_user(t1, &cache->blob[1]);
1818        }
1819        return err ? -EFAULT : 0;
1820}
1821