1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/config.h>
15#include <linux/slab.h>
16#include <linux/init.h>
17#include <linux/unistd.h>
18#include <linux/smp_lock.h>
19#include <linux/module.h>
20#include <linux/vmalloc.h>
21#include <linux/completion.h>
22#include <linux/namespace.h>
23#include <linux/personality.h>
24#include <linux/mempolicy.h>
25#include <linux/sem.h>
26#include <linux/file.h>
27#include <linux/key.h>
28#include <linux/binfmts.h>
29#include <linux/mman.h>
30#include <linux/fs.h>
31#include <linux/cpu.h>
32#include <linux/security.h>
33#include <linux/swap.h>
34#include <linux/syscalls.h>
35#include <linux/jiffies.h>
36#include <linux/futex.h>
37#include <linux/task_io_accounting_ops.h>
38#include <linux/ptrace.h>
39#include <linux/mount.h>
40#include <linux/audit.h>
41#include <linux/profile.h>
42#include <linux/rmap.h>
43#include <linux/hash.h>
44
45#include <asm/pgtable.h>
46#include <asm/pgalloc.h>
47#include <asm/uaccess.h>
48#include <asm/mmu_context.h>
49#include <asm/cacheflush.h>
50#include <asm/tlbflush.h>
51
52
53
54
55int nr_threads;
56
57int max_threads;
58unsigned long total_forks;
59
60DEFINE_PER_CPU(unsigned long, process_counts) = 0;
61
62rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;
63
64EXPORT_SYMBOL(tasklist_lock);
65
66#define MM_FLAGS_HASH_BITS 10
67#define MM_FLAGS_HASH_SIZE (1 << MM_FLAGS_HASH_BITS)
68struct hlist_head mm_flags_hash[MM_FLAGS_HASH_SIZE] =
69 { [ 0 ... MM_FLAGS_HASH_SIZE - 1 ] = HLIST_HEAD_INIT };
70DEFINE_SPINLOCK(mm_flags_lock);
71#define MM_HASH_SHIFT ((sizeof(struct mm_struct) >= 1024) ? 10 \
72 : (sizeof(struct mm_struct) >= 512) ? 9 \
73 : 8)
74#define mm_flags_hash_fn(mm) \
75 hash_long((unsigned long)(mm) >> MM_HASH_SHIFT, MM_FLAGS_HASH_BITS)
76
77int nr_processes(void)
78{
79 int cpu;
80 int total = 0;
81
82 for_each_online_cpu(cpu)
83 total += per_cpu(process_counts, cpu);
84
85 return total;
86}
87
88#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
89# define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
90# define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk))
91static kmem_cache_t *task_struct_cachep;
92#endif
93
94void free_task(struct task_struct *tsk)
95{
96 kfree(task_aux(tsk));
97 free_thread_info(tsk->thread_info);
98 free_task_struct(tsk);
99}
100EXPORT_SYMBOL(free_task);
101
102void __put_task_struct(struct task_struct *tsk)
103{
104 WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
105 WARN_ON(atomic_read(&tsk->usage));
106 WARN_ON(tsk == current);
107
108 security_task_free(tsk);
109 free_uid(tsk->user);
110 put_group_info(tsk->group_info);
111
112 if (!profile_handoff_task(tsk))
113 free_task(tsk);
114}
115
116void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
117{
118 unsigned long flags;
119
120 wait->flags &= ~WQ_FLAG_EXCLUSIVE;
121 spin_lock_irqsave(&q->lock, flags);
122 __add_wait_queue(q, wait);
123 spin_unlock_irqrestore(&q->lock, flags);
124}
125
126EXPORT_SYMBOL(add_wait_queue);
127
128void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)
129{
130 unsigned long flags;
131
132 wait->flags |= WQ_FLAG_EXCLUSIVE;
133 spin_lock_irqsave(&q->lock, flags);
134 __add_wait_queue_tail(q, wait);
135 spin_unlock_irqrestore(&q->lock, flags);
136}
137
138EXPORT_SYMBOL(add_wait_queue_exclusive);
139
140void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
141{
142 unsigned long flags;
143
144 spin_lock_irqsave(&q->lock, flags);
145 __remove_wait_queue(q, wait);
146 spin_unlock_irqrestore(&q->lock, flags);
147}
148
149EXPORT_SYMBOL(remove_wait_queue);
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164void fastcall prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
165{
166 unsigned long flags;
167
168 wait->flags &= ~WQ_FLAG_EXCLUSIVE;
169 spin_lock_irqsave(&q->lock, flags);
170 if (list_empty(&wait->task_list))
171 __add_wait_queue(q, wait);
172
173
174
175
176 if (is_sync_wait(wait))
177 set_current_state(state);
178 spin_unlock_irqrestore(&q->lock, flags);
179}
180
181EXPORT_SYMBOL(prepare_to_wait);
182
183void fastcall
184prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
185{
186 unsigned long flags;
187
188 wait->flags |= WQ_FLAG_EXCLUSIVE;
189 spin_lock_irqsave(&q->lock, flags);
190 if (list_empty(&wait->task_list))
191 __add_wait_queue_tail(q, wait);
192
193
194
195
196 if (is_sync_wait(wait))
197 set_current_state(state);
198 spin_unlock_irqrestore(&q->lock, flags);
199}
200
201EXPORT_SYMBOL(prepare_to_wait_exclusive);
202
203void fastcall finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
204{
205 unsigned long flags;
206
207 __set_current_state(TASK_RUNNING);
208
209
210
211
212
213
214
215
216
217
218
219
220
221 if (!list_empty_careful(&wait->task_list)) {
222 spin_lock_irqsave(&q->lock, flags);
223 list_del_init(&wait->task_list);
224 spin_unlock_irqrestore(&q->lock, flags);
225 }
226}
227
228EXPORT_SYMBOL(finish_wait);
229
230int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
231{
232 int ret = default_wake_function(wait, mode, sync, key);
233
234 if (ret)
235 list_del_init(&wait->task_list);
236 return ret;
237}
238
239EXPORT_SYMBOL(autoremove_wake_function);
240
241static struct task_struct_aux init_task_aux;
242
243void __init fork_init(unsigned long mempages)
244{
245 task_aux(current) = &init_task_aux;
246#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
247#ifndef ARCH_MIN_TASKALIGN
248#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
249#endif
250
251 task_struct_cachep =
252 kmem_cache_create("task_struct", sizeof(struct task_struct),
253 ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
254#endif
255
256
257
258
259
260
261 max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8;
262
263
264
265 if(max_threads < 20)
266 max_threads = 20;
267
268 init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
269 init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
270}
271
272static struct task_struct *dup_task_struct(struct task_struct *orig)
273{
274 struct task_struct_aux *aux;
275 struct task_struct *tsk;
276 struct thread_info *ti;
277
278 prepare_to_copy(orig);
279
280 tsk = alloc_task_struct();
281 if (!tsk)
282 return NULL;
283
284 ti = alloc_thread_info(tsk);
285 if (!ti) {
286 free_task_struct(tsk);
287 return NULL;
288 }
289
290 aux = kmalloc(sizeof(*aux), GFP_KERNEL);
291 if (!aux) {
292 free_thread_info(ti);
293 free_task_struct(tsk);
294 return NULL;
295 }
296
297 *ti = *orig->thread_info;
298 *aux = *task_aux(orig);
299 *tsk = *orig;
300 tsk->thread_info = ti;
301 ti->task = tsk;
302 task_aux(tsk) = aux;
303
304
305 atomic_set(&tsk->usage,2);
306 return tsk;
307}
308
309
310static struct mm_flags *__find_mm_flags(struct mm_struct *addr)
311{
312 struct hlist_head *head;
313 struct hlist_node *node;
314 struct mm_flags *p;
315
316 head = &mm_flags_hash[mm_flags_hash_fn(addr)];
317 hlist_for_each_entry(p, node, head, hlist) {
318 if (p->addr == addr)
319 return p;
320 }
321 return NULL;
322}
323
324unsigned long get_mm_flags(struct mm_struct *mm)
325{
326 struct mm_flags *p;
327 unsigned long flags = MMF_DUMP_FILTER_DEFAULT;
328
329 spin_lock(&mm_flags_lock);
330 p = __find_mm_flags(mm);
331 if (p)
332 flags = p->flags;
333 spin_unlock(&mm_flags_lock);
334
335 return flags;
336}
337
338int set_mm_flags(struct mm_struct *mm, unsigned long flags, int check_dup)
339{
340 struct mm_flags *p, *new_p;
341
342 flags &= MMF_DUMP_FILTER_MASK;
343
344 if (check_dup) {
345
346 spin_lock(&mm_flags_lock);
347 p = __find_mm_flags(mm);
348 if (p) {
349 p->flags = flags;
350 spin_unlock(&mm_flags_lock);
351 return 0;
352 }
353 spin_unlock(&mm_flags_lock);
354
355
356 if (flags == MMF_DUMP_FILTER_DEFAULT)
357 return 0;
358 }
359
360
361 new_p = kmalloc(sizeof(*new_p), GFP_KERNEL);
362 if (!new_p)
363 return -ENOMEM;
364
365 spin_lock(&mm_flags_lock);
366 if (!check_dup || !(p = __find_mm_flags(mm))) {
367 struct hlist_head *head;
368 head = &mm_flags_hash[mm_flags_hash_fn(mm)];
369 p = new_p;
370 p->addr = mm;
371 hlist_add_head(&p->hlist, head);
372 } else
373 kfree(new_p);
374 p->flags = flags;
375 spin_unlock(&mm_flags_lock);
376
377 return 0;
378}
379
380static void free_mm_flags(struct mm_struct *mm) {
381 struct mm_flags *p;
382
383 spin_lock(&mm_flags_lock);
384 p = __find_mm_flags(mm);
385 if (p) {
386 hlist_del(&p->hlist);
387 kfree(p);
388 }
389 spin_unlock(&mm_flags_lock);
390}
391
392#ifdef CONFIG_MMU
393static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
394{
395 struct vm_area_struct * mpnt, *tmp, **pprev;
396 struct rb_node **rb_link, *rb_parent;
397 int retval;
398 unsigned long charge;
399 struct mempolicy *pol;
400
401 down_write(&oldmm->mmap_sem);
402 flush_cache_mm(current->mm);
403 mm->locked_vm = 0;
404 mm->mmap = NULL;
405 mm->mmap_cache = NULL;
406 mm->free_area_cache = oldmm->mmap_base;
407 mm->map_count = 0;
408 mm->rss = 0;
409 mm->anon_rss = 0;
410 cpus_clear(mm->cpu_vm_mask);
411 mm->mm_rb = RB_ROOT;
412 rb_link = &mm->mm_rb.rb_node;
413 rb_parent = NULL;
414 pprev = &mm->mmap;
415
416
417
418
419
420
421
422 spin_lock(&mmlist_lock);
423 list_add(&mm->mmlist, ¤t->mm->mmlist);
424 mmlist_nr++;
425 spin_unlock(&mmlist_lock);
426
427 for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
428 struct file *file;
429
430 if (mpnt->vm_flags & VM_DONTCOPY) {
431 __vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
432 -vma_pages(mpnt));
433 continue;
434 }
435 charge = 0;
436 if (mpnt->vm_flags & VM_ACCOUNT) {
437 unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
438 if (security_vm_enough_memory(len))
439 goto fail_nomem;
440 charge = len;
441 }
442 tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
443 if (!tmp)
444 goto fail_nomem;
445 *tmp = *mpnt;
446 pol = mpol_copy(vma_policy(mpnt));
447 retval = PTR_ERR(pol);
448 if (IS_ERR(pol))
449 goto fail_nomem_policy;
450 vma_set_policy(tmp, pol);
451 tmp->vm_flags &= ~VM_LOCKED;
452 tmp->vm_mm = mm;
453 tmp->vm_next = NULL;
454 anon_vma_link(tmp);
455 file = tmp->vm_file;
456 if (file) {
457 struct inode *inode = file->f_dentry->d_inode;
458 get_file(file);
459 if (tmp->vm_flags & VM_DENYWRITE)
460 atomic_dec(&inode->i_writecount);
461
462
463 spin_lock(&file->f_mapping->i_mmap_lock);
464 flush_dcache_mmap_lock(file->f_mapping);
465 vma_prio_tree_add(tmp, mpnt);
466 flush_dcache_mmap_unlock(file->f_mapping);
467 spin_unlock(&file->f_mapping->i_mmap_lock);
468 }
469
470
471
472
473
474
475 spin_lock(&mm->page_table_lock);
476 *pprev = tmp;
477 pprev = &tmp->vm_next;
478
479 __vma_link_rb(mm, tmp, rb_link, rb_parent);
480 rb_link = &tmp->vm_rb.rb_right;
481 rb_parent = &tmp->vm_rb;
482
483 mm->map_count++;
484 retval = copy_page_range(mm, current->mm, tmp);
485 spin_unlock(&mm->page_table_lock);
486
487 if (tmp->vm_ops && tmp->vm_ops->open)
488 tmp->vm_ops->open(tmp);
489
490 if (retval)
491 goto out;
492 }
493#ifdef arch_dup_mmap
494 arch_dup_mmap(mm, oldmm);
495#endif
496 retval = 0;
497
498out:
499 flush_tlb_mm(current->mm);
500 up_write(&oldmm->mmap_sem);
501 return retval;
502fail_nomem_policy:
503 kmem_cache_free(vm_area_cachep, tmp);
504fail_nomem:
505 retval = -ENOMEM;
506 vm_unacct_memory(charge);
507 goto out;
508}
509
510static inline int mm_alloc_pgd(struct mm_struct * mm)
511{
512 mm->pgd = pgd_alloc(mm);
513 if (unlikely(!mm->pgd))
514 return -ENOMEM;
515 return 0;
516}
517
518static inline void mm_free_pgd(struct mm_struct * mm)
519{
520 pgd_free(mm->pgd);
521}
522#else
523#define dup_mmap(mm, oldmm) (0)
524#define mm_alloc_pgd(mm) (0)
525#define mm_free_pgd(mm)
526#endif
527
528spinlock_t mmlist_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
529int mmlist_nr;
530
531#define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
532#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
533
534#include <linux/init_task.h>
535
536static struct mm_struct * mm_init(struct mm_struct * mm)
537{
538 unsigned long mm_flags;
539
540 atomic_set(&mm->mm_users, 1);
541 atomic_set(&mm->mm_count, 1);
542 init_rwsem(&mm->mmap_sem);
543 mm->core_waiters = 0;
544 mm->page_table_lock = SPIN_LOCK_UNLOCKED;
545 mm->ioctx_list_lock = RW_LOCK_UNLOCKED;
546 mm->ioctx_list = NULL;
547 mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
548 mm->free_area_cache = TASK_UNMAPPED_BASE;
549
550 mm_flags = get_mm_flags(current->mm);
551 if (mm_flags != MMF_DUMP_FILTER_DEFAULT) {
552 if (unlikely(set_mm_flags(mm, mm_flags, 0) < 0))
553 goto fail_nomem;
554 }
555
556 if (likely(!mm_alloc_pgd(mm))) {
557 mm->def_flags = 0;
558 return mm;
559 }
560
561 if (mm_flags != MMF_DUMP_FILTER_DEFAULT)
562 free_mm_flags(mm);
563fail_nomem:
564 free_mm(mm);
565 return NULL;
566}
567
568
569
570
571struct mm_struct * mm_alloc(void)
572{
573 struct mm_struct * mm;
574
575 mm = allocate_mm();
576 if (mm) {
577 memset(mm, 0, sizeof(*mm));
578 mm = mm_init(mm);
579 }
580 return mm;
581}
582
583
584
585
586
587
588void fastcall __mmdrop(struct mm_struct *mm)
589{
590 BUG_ON(mm == &init_mm);
591 free_mm_flags(mm);
592 mm_free_pgd(mm);
593 destroy_context(mm);
594 free_mm(mm);
595}
596
597
598
599
600void mmput(struct mm_struct *mm)
601{
602 if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {
603 list_del(&mm->mmlist);
604 mmlist_nr--;
605 spin_unlock(&mmlist_lock);
606 exit_aio(mm);
607 exit_mmap(mm);
608 put_swap_token(mm);
609 mmdrop(mm);
610 }
611}
612EXPORT_SYMBOL_GPL(mmput);
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627struct mm_struct *get_task_mm(struct task_struct *task)
628{
629 struct mm_struct *mm;
630
631 task_lock(task);
632 mm = task->mm;
633 if (mm) {
634 spin_lock(&mmlist_lock);
635 if (!atomic_read(&mm->mm_users))
636 mm = NULL;
637 else
638 atomic_inc(&mm->mm_users);
639 spin_unlock(&mmlist_lock);
640 }
641 task_unlock(task);
642 return mm;
643}
644EXPORT_SYMBOL_GPL(get_task_mm);
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659void mm_release(struct task_struct *tsk, struct mm_struct *mm)
660{
661 struct completion *vfork_done = task_aux(tsk)->vfork_done;
662
663
664 deactivate_mm(tsk, mm);
665
666
667 if (vfork_done) {
668 task_aux(tsk)->vfork_done = NULL;
669 complete(vfork_done);
670 }
671
672
673
674
675
676
677
678 if (tsk->clear_child_tid
679 && !(tsk->flags & PF_SIGNALED)
680 && atomic_read(&mm->mm_users) > 1) {
681 u32 __user * tidptr = tsk->clear_child_tid;
682 tsk->clear_child_tid = NULL;
683
684
685
686
687
688 put_user(0, tidptr);
689 sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
690 }
691}
692
693static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
694{
695 struct mm_struct * mm, *oldmm;
696 int retval;
697
698 tsk->min_flt = tsk->maj_flt = 0;
699 tsk->nvcsw = tsk->nivcsw = 0;
700
701 tsk->mm = NULL;
702 tsk->active_mm = NULL;
703
704
705
706
707
708
709 oldmm = current->mm;
710 if (!oldmm)
711 return 0;
712
713 if (clone_flags & CLONE_VM) {
714 atomic_inc(&oldmm->mm_users);
715 mm = oldmm;
716
717
718
719
720
721
722 spin_unlock_wait(&oldmm->page_table_lock);
723 goto good_mm;
724 }
725
726 retval = -ENOMEM;
727 mm = allocate_mm();
728 if (!mm)
729 goto fail_nomem;
730
731
732 memcpy(mm, oldmm, sizeof(*mm));
733 if (!mm_init(mm))
734 goto fail_nomem;
735
736 if (init_new_context(tsk,mm))
737 goto fail_nocontext;
738
739 retval = dup_mmap(mm, oldmm);
740 if (retval)
741 goto free_pt;
742
743good_mm:
744 tsk->mm = mm;
745 tsk->active_mm = mm;
746 return 0;
747
748free_pt:
749 mmput(mm);
750fail_nomem:
751 return retval;
752
753fail_nocontext:
754
755
756
757
758 free_mm_flags(mm);
759 mm_free_pgd(mm);
760 free_mm(mm);
761 return retval;
762}
763
764static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
765{
766 struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
767
768 if (fs) {
769 atomic_set(&fs->count, 1);
770 fs->lock = RW_LOCK_UNLOCKED;
771 fs->umask = old->umask;
772 read_lock(&old->lock);
773 fs->rootmnt = mntget(old->rootmnt);
774 fs->root = dget(old->root);
775 fs->pwdmnt = mntget(old->pwdmnt);
776 fs->pwd = dget(old->pwd);
777 if (old->altroot) {
778 fs->altrootmnt = mntget(old->altrootmnt);
779 fs->altroot = dget(old->altroot);
780 } else {
781 fs->altrootmnt = NULL;
782 fs->altroot = NULL;
783 }
784 read_unlock(&old->lock);
785 }
786 return fs;
787}
788
789struct fs_struct *copy_fs_struct(struct fs_struct *old)
790{
791 return __copy_fs_struct(old);
792}
793
794EXPORT_SYMBOL_GPL(copy_fs_struct);
795
796static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
797{
798 if (clone_flags & CLONE_FS) {
799 atomic_inc(¤t->fs->count);
800 return 0;
801 }
802 tsk->fs = __copy_fs_struct(current->fs);
803 if (!tsk->fs)
804 return -ENOMEM;
805 return 0;
806}
807
808static int count_open_files(struct files_struct *files, int size)
809{
810 int i;
811
812
813 for (i = size/(8*sizeof(long)); i > 0; ) {
814 if (files->open_fds->fds_bits[--i])
815 break;
816 }
817 i = (i+1) * 8 * sizeof(long);
818 return i;
819}
820
821static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
822{
823 struct files_struct *oldf, *newf;
824 struct file **old_fds, **new_fds;
825 int open_files, nfds, size, i, error = 0;
826
827
828
829
830 oldf = current->files;
831 if (!oldf)
832 goto out;
833
834 if (clone_flags & CLONE_FILES) {
835 atomic_inc(&oldf->count);
836 goto out;
837 }
838
839
840
841
842
843
844 tsk->files = NULL;
845 error = -ENOMEM;
846 newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
847 if (!newf)
848 goto out;
849
850 atomic_set(&newf->count, 1);
851
852 newf->file_lock = SPIN_LOCK_UNLOCKED;
853 newf->next_fd = 0;
854 newf->max_fds = NR_OPEN_DEFAULT;
855 newf->max_fdset = __FD_SETSIZE;
856 newf->close_on_exec = &newf->close_on_exec_init;
857 newf->open_fds = &newf->open_fds_init;
858 newf->fd = &newf->fd_array[0];
859
860
861
862 size = oldf->max_fdset;
863 if (size > __FD_SETSIZE) {
864 newf->max_fdset = 0;
865 spin_lock(&newf->file_lock);
866 error = expand_fdset(newf, size-1);
867 spin_unlock(&newf->file_lock);
868 if (error)
869 goto out_release;
870 }
871 spin_lock(&oldf->file_lock);
872
873 open_files = count_open_files(oldf, size);
874
875
876
877
878
879
880 nfds = NR_OPEN_DEFAULT;
881 if (open_files > nfds) {
882 spin_unlock(&oldf->file_lock);
883 newf->max_fds = 0;
884 spin_lock(&newf->file_lock);
885 error = expand_fd_array(newf, open_files-1);
886 spin_unlock(&newf->file_lock);
887 if (error)
888 goto out_release;
889 nfds = newf->max_fds;
890 spin_lock(&oldf->file_lock);
891 }
892
893 old_fds = oldf->fd;
894 new_fds = newf->fd;
895
896 memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
897 memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);
898
899 for (i = open_files; i != 0; i--) {
900 struct file *f = *old_fds++;
901 if (f) {
902 get_file(f);
903 } else {
904
905
906
907
908
909
910 FD_CLR(open_files - i, newf->open_fds);
911 }
912 *new_fds++ = f;
913 }
914 spin_unlock(&oldf->file_lock);
915
916
917 size = (newf->max_fds - open_files) * sizeof(struct file *);
918
919
920 memset(new_fds, 0, size);
921
922 if (newf->max_fdset > open_files) {
923 int left = (newf->max_fdset-open_files)/8;
924 int start = open_files / (8 * sizeof(unsigned long));
925
926 memset(&newf->open_fds->fds_bits[start], 0, left);
927 memset(&newf->close_on_exec->fds_bits[start], 0, left);
928 }
929
930 tsk->files = newf;
931 error = 0;
932out:
933 return error;
934
935out_release:
936 free_fdset (newf->close_on_exec, newf->max_fdset);
937 free_fdset (newf->open_fds, newf->max_fdset);
938 kmem_cache_free(files_cachep, newf);
939 goto out;
940}
941
942
943
944
945
946
947
948int unshare_files(void)
949{
950 struct files_struct *files = current->files;
951 int rc;
952
953 if(!files)
954 BUG();
955
956
957
958 if(atomic_read(&files->count) == 1)
959 {
960 atomic_inc(&files->count);
961 return 0;
962 }
963 rc = copy_files(0, current);
964 if(rc)
965 current->files = files;
966 return rc;
967}
968
969EXPORT_SYMBOL(unshare_files);
970
971static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
972{
973 struct sighand_struct *sig;
974
975 if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
976 atomic_inc(¤t->sighand->count);
977 return 0;
978 }
979 sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
980 tsk->sighand = sig;
981 if (!sig)
982 return -ENOMEM;
983 spin_lock_init(&sig->siglock);
984 atomic_set(&sig->count, 1);
985 memcpy(sig->action, current->sighand->action, sizeof(sig->action));
986 return 0;
987}
988
989static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
990{
991 struct signal_struct *sig;
992 int ret;
993
994 if (clone_flags & CLONE_THREAD) {
995 atomic_inc(¤t->signal->count);
996 atomic_inc(¤t->signal->live);
997 return 0;
998 }
999 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
1000 tsk->signal = sig;
1001 if (!sig)
1002 return -ENOMEM;
1003
1004 ret = copy_thread_group_keys(tsk);
1005 if (ret < 0) {
1006 kmem_cache_free(signal_cachep, sig);
1007 return ret;
1008 }
1009
1010 atomic_set(&sig->count, 1);
1011 atomic_set(&sig->live, 1);
1012 sig->group_exit = 0;
1013 sig->group_exit_code = 0;
1014 sig->group_exit_task = NULL;
1015 sig->group_stop_count = 0;
1016 sig->stop_state = 0;
1017 sig->curr_target = NULL;
1018 init_sigpending(&sig->shared_pending);
1019 INIT_LIST_HEAD(&sig->posix_timers);
1020
1021 sig->tty = current->signal->tty;
1022 sig->pgrp = process_group(current);
1023 sig->session = current->signal->session;
1024 sig->leader = 0;
1025 sig->tty_old_pgrp = 0;
1026
1027 sig->utime = sig->stime = sig->cutime = sig->cstime = 0;
1028 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
1029 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
1030 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
1031
1032 return 0;
1033}
1034
1035static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
1036{
1037 unsigned long new_flags = p->flags;
1038
1039 new_flags &= ~PF_SUPERPRIV;
1040 new_flags |= PF_FORKNOEXEC;
1041 if (!(clone_flags & CLONE_PTRACE))
1042 p->ptrace = 0;
1043 p->flags = new_flags;
1044}
1045
1046asmlinkage long sys_set_tid_address(int __user *tidptr)
1047{
1048 current->clear_child_tid = tidptr;
1049
1050 return current->pid;
1051}
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061static task_t *copy_process(unsigned long clone_flags,
1062 unsigned long stack_start,
1063 struct pt_regs *regs,
1064 unsigned long stack_size,
1065 int __user *parent_tidptr,
1066 int __user *child_tidptr,
1067 int pid)
1068{
1069 int retval;
1070 struct task_struct *p = NULL;
1071
1072 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
1073 return ERR_PTR(-EINVAL);
1074
1075
1076
1077
1078
1079 if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
1080 return ERR_PTR(-EINVAL);
1081
1082
1083
1084
1085
1086
1087 if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
1088 return ERR_PTR(-EINVAL);
1089
1090 retval = security_task_create(clone_flags);
1091 if (retval)
1092 goto fork_out;
1093
1094 retval = -ENOMEM;
1095 p = dup_task_struct(current);
1096 if (!p)
1097 goto fork_out;
1098 p->tux_info = NULL;
1099
1100 retval = -EAGAIN;
1101 if (atomic_read(&p->user->processes) >=
1102 p->rlim[RLIMIT_NPROC].rlim_cur) {
1103 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
1104 p->user != &root_user)
1105 goto bad_fork_free;
1106 }
1107
1108 atomic_inc(&p->user->__count);
1109 atomic_inc(&p->user->processes);
1110 get_group_info(p->group_info);
1111
1112
1113
1114
1115
1116
1117 if (nr_threads >= max_threads)
1118 goto bad_fork_cleanup_count;
1119
1120 if (!try_module_get(p->thread_info->exec_domain->module))
1121 goto bad_fork_cleanup_count;
1122
1123 if (p->binfmt && !try_module_get(p->binfmt->module))
1124 goto bad_fork_cleanup_put_domain;
1125
1126 p->did_exec = 0;
1127 copy_flags(clone_flags, p);
1128 p->pid = pid;
1129 retval = -EFAULT;
1130 if (clone_flags & CLONE_PARENT_SETTID)
1131 if (put_user(p->pid, parent_tidptr))
1132 goto bad_fork_cleanup;
1133
1134 p->proc_dentry = NULL;
1135
1136 INIT_LIST_HEAD(&p->children);
1137 INIT_LIST_HEAD(&p->sibling);
1138 init_waitqueue_head(&p->wait_chldexit);
1139 task_aux(p)->vfork_done = NULL;
1140 spin_lock_init(&p->alloc_lock);
1141 spin_lock_init(&p->proc_lock);
1142
1143 clear_tsk_thread_flag(p, TIF_SIGPENDING);
1144 init_sigpending(&p->pending);
1145
1146 task_io_accounting_init(p);
1147
1148 p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
1149 p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
1150 init_timer(&p->real_timer);
1151 p->real_timer.data = (unsigned long) p;
1152
1153 p->utime = p->stime = 0;
1154 p->lock_depth = -1;
1155 do_posix_clock_monotonic_gettime(&p->start_time);
1156 p->security = NULL;
1157 p->io_context = NULL;
1158 p->io_wait = NULL;
1159 p->audit_context = NULL;
1160#ifdef CONFIG_NUMA
1161 p->mempolicy = mpol_copy(p->mempolicy);
1162 if (IS_ERR(p->mempolicy)) {
1163 retval = PTR_ERR(p->mempolicy);
1164 p->mempolicy = NULL;
1165 goto bad_fork_cleanup;
1166 }
1167#endif
1168
1169 if ((retval = security_task_alloc(p)))
1170 goto bad_fork_cleanup_policy;
1171 if ((retval = audit_alloc(p)))
1172 goto bad_fork_cleanup_security;
1173
1174 if ((retval = copy_semundo(clone_flags, p)))
1175 goto bad_fork_cleanup_audit;
1176 if ((retval = copy_files(clone_flags, p)))
1177 goto bad_fork_cleanup_semundo;
1178 if ((retval = copy_fs(clone_flags, p)))
1179 goto bad_fork_cleanup_files;
1180 if ((retval = copy_sighand(clone_flags, p)))
1181 goto bad_fork_cleanup_fs;
1182 if ((retval = copy_signal(clone_flags, p)))
1183 goto bad_fork_cleanup_sighand;
1184 if ((retval = copy_mm(clone_flags, p)))
1185 goto bad_fork_cleanup_signal;
1186 if ((retval = copy_keys(clone_flags, p)))
1187 goto bad_fork_cleanup_mm;
1188 if ((retval = copy_namespace(clone_flags, p)))
1189 goto bad_fork_cleanup_keys;
1190 retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
1191 if (retval)
1192 goto bad_fork_cleanup_namespace;
1193
1194 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1195
1196
1197
1198 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
1199
1200
1201
1202
1203
1204 clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
1205
1206
1207 p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
1208 p->pdeath_signal = 0;
1209 p->exit_state = 0;
1210
1211
1212 sched_fork(p);
1213
1214
1215
1216
1217
1218 p->tgid = p->pid;
1219 p->group_leader = p;
1220 INIT_LIST_HEAD(&p->ptrace_children);
1221 INIT_LIST_HEAD(&p->ptrace_list);
1222
1223
1224 write_lock_irq(&tasklist_lock);
1225
1226
1227
1228
1229
1230
1231
1232
1233 p->cpus_allowed = current->cpus_allowed;
1234 set_task_cpu(p, smp_processor_id());
1235
1236
1237
1238
1239
1240 if (sigismember(¤t->pending.signal, SIGKILL)) {
1241 write_unlock_irq(&tasklist_lock);
1242 retval = -EINTR;
1243 goto bad_fork_cleanup_namespace;
1244 }
1245
1246
1247 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1248 p->real_parent = current->real_parent;
1249 p->parent_exec_id = current->parent_exec_id;
1250 } else {
1251 p->real_parent = current;
1252 p->parent_exec_id = current->self_exec_id;
1253 }
1254 p->parent = p->real_parent;
1255
1256 spin_lock(¤t->sighand->siglock);
1257 if (clone_flags & CLONE_THREAD) {
1258
1259
1260
1261
1262
1263 if (current->signal->group_exit) {
1264 spin_unlock(¤t->sighand->siglock);
1265 write_unlock_irq(&tasklist_lock);
1266 retval = -EAGAIN;
1267 goto bad_fork_cleanup_namespace;
1268 }
1269 p->tgid = current->tgid;
1270 p->group_leader = current->group_leader;
1271
1272 if (current->signal->group_stop_count > 0) {
1273
1274
1275
1276
1277
1278 current->signal->group_stop_count++;
1279 set_tsk_thread_flag(p, TIF_SIGPENDING);
1280 }
1281 }
1282
1283 SET_LINKS(p);
1284 if (unlikely(p->ptrace & PT_PTRACED))
1285 __ptrace_link(p, current->parent);
1286
1287 if (thread_group_leader(p)) {
1288 attach_pid(p, PIDTYPE_PGID, process_group(p));
1289 attach_pid(p, PIDTYPE_SID, p->signal->session);
1290 if (p->pid)
1291 __get_cpu_var(process_counts)++;
1292 }
1293 attach_pid(p, PIDTYPE_TGID, p->tgid);
1294 attach_pid(p, PIDTYPE_PID, p->pid);
1295
1296 if (!current->signal->tty && p->signal->tty)
1297 p->signal->tty = NULL;
1298
1299 nr_threads++;
1300 spin_unlock(¤t->sighand->siglock);
1301 write_unlock_irq(&tasklist_lock);
1302 retval = 0;
1303
1304fork_out:
1305 if (retval)
1306 return ERR_PTR(retval);
1307 return p;
1308
1309bad_fork_cleanup_namespace:
1310 exit_namespace(p);
1311bad_fork_cleanup_keys:
1312 exit_keys(p);
1313bad_fork_cleanup_mm:
1314 if (p->mm)
1315 mmput(p->mm);
1316bad_fork_cleanup_signal:
1317 exit_signal(p);
1318bad_fork_cleanup_sighand:
1319 exit_sighand(p);
1320bad_fork_cleanup_fs:
1321 exit_fs(p);
1322bad_fork_cleanup_files:
1323 exit_files(p);
1324bad_fork_cleanup_semundo:
1325 exit_sem(p);
1326bad_fork_cleanup_audit:
1327 audit_free(p);
1328bad_fork_cleanup_security:
1329 security_task_free(p);
1330bad_fork_cleanup_policy:
1331#ifdef CONFIG_NUMA
1332 mpol_free(p->mempolicy);
1333#endif
1334bad_fork_cleanup:
1335 if (p->binfmt)
1336 module_put(p->binfmt->module);
1337bad_fork_cleanup_put_domain:
1338 module_put(p->thread_info->exec_domain->module);
1339bad_fork_cleanup_count:
1340 put_group_info(p->group_info);
1341 atomic_dec(&p->user->processes);
1342 free_uid(p->user);
1343bad_fork_free:
1344 free_task(p);
1345 goto fork_out;
1346}
1347
1348struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
1349{
1350 memset(regs, 0, sizeof(struct pt_regs));
1351 return regs;
1352}
1353
1354task_t * __devinit fork_idle(int cpu)
1355{
1356 task_t *task;
1357 struct pt_regs regs;
1358
1359 task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, NULL, 0);
1360 if (!task)
1361 return ERR_PTR(-ENOMEM);
1362 init_idle(task, cpu);
1363 unhash_process(task);
1364 return task;
1365}
1366
1367static inline int fork_traceflag (unsigned clone_flags)
1368{
1369 if (clone_flags & CLONE_UNTRACED)
1370 return 0;
1371 else if (clone_flags & CLONE_VFORK) {
1372 if (current->ptrace & PT_TRACE_VFORK)
1373 return PTRACE_EVENT_VFORK;
1374 } else if ((clone_flags & CSIGNAL) != SIGCHLD) {
1375 if (current->ptrace & PT_TRACE_CLONE)
1376 return PTRACE_EVENT_CLONE;
1377 } else if (current->ptrace & PT_TRACE_FORK)
1378 return PTRACE_EVENT_FORK;
1379
1380 return 0;
1381}
1382
1383
1384
1385
1386
1387
1388
1389long do_fork(unsigned long clone_flags,
1390 unsigned long stack_start,
1391 struct pt_regs *regs,
1392 unsigned long stack_size,
1393 int __user *parent_tidptr,
1394 int __user *child_tidptr)
1395{
1396 struct task_struct *p;
1397 int trace = 0;
1398 long pid = alloc_pidmap();
1399
1400 if (pid < 0)
1401 return -EAGAIN;
1402 if (unlikely(current->ptrace)) {
1403 trace = fork_traceflag (clone_flags);
1404 if (trace)
1405 clone_flags |= CLONE_PTRACE;
1406 }
1407
1408 p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
1409
1410
1411
1412
1413 if (!IS_ERR(p)) {
1414 struct completion vfork;
1415
1416 if (clone_flags & CLONE_VFORK) {
1417 task_aux(p)->vfork_done = &vfork;
1418 init_completion(&vfork);
1419 }
1420
1421 if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) {
1422
1423
1424
1425 sigaddset(&p->pending.signal, SIGSTOP);
1426 set_tsk_thread_flag(p, TIF_SIGPENDING);
1427 }
1428
1429 if (!(clone_flags & CLONE_STOPPED))
1430 wake_up_new_task(p, clone_flags);
1431 else
1432 p->state = TASK_STOPPED;
1433 ++total_forks;
1434
1435 if (unlikely (trace)) {
1436 current->ptrace_message = pid;
1437 ptrace_notify ((trace << 8) | SIGTRAP);
1438 }
1439
1440 if (clone_flags & CLONE_VFORK) {
1441 wait_for_completion(&vfork);
1442 if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
1443 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
1444 }
1445 } else {
1446 free_pidmap(pid);
1447 pid = PTR_ERR(p);
1448 }
1449 return pid;
1450}
1451
1452
1453kmem_cache_t *signal_cachep;
1454
1455
1456kmem_cache_t *sighand_cachep;
1457
1458
1459kmem_cache_t *files_cachep;
1460
1461
1462kmem_cache_t *fs_cachep;
1463
1464
1465kmem_cache_t *vm_area_cachep;
1466
1467
1468kmem_cache_t *mm_cachep;
1469
1470void __init proc_caches_init(void)
1471{
1472 sighand_cachep = kmem_cache_create("sighand_cache",
1473 sizeof(struct sighand_struct), 0,
1474 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1475 signal_cachep = kmem_cache_create("signal_cache",
1476 sizeof(struct signal_struct), 0,
1477 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1478 files_cachep = kmem_cache_create("files_cache",
1479 sizeof(struct files_struct), 0,
1480 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1481 fs_cachep = kmem_cache_create("fs_cache",
1482 sizeof(struct fs_struct), 0,
1483 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1484 vm_area_cachep = kmem_cache_create("vm_area_struct",
1485 sizeof(struct vm_area_struct), 0,
1486 SLAB_PANIC, NULL, NULL);
1487 mm_cachep = kmem_cache_create("mm_struct",
1488 sizeof(struct mm_struct), 0,
1489 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1490}
1491