And thus calling "exec" recursively - does not recurse....
/*
* sys_execve() executes a new program.
*/
int do_execve(char * filename,
char __user *__user *argv,
char __user *__user *envp,
struct pt_regs * regs)
{
struct linux_binprm *bprm;
struct file *file;
struct files_struct *displaced;
int retval;
retval = unshare_files(&displaced);
if (retval)
goto out_ret;
retval = -ENOMEM;
bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
if (!bprm)
goto out_files;
file = open_exec(filename);
retval = PTR_ERR(file);
if (IS_ERR(file))
goto out_kfree;
sched_exec();
bprm->file = file;
bprm->filename = filename;
bprm->interp = filename;
retval = bprm_mm_init(bprm);
if (retval)
goto out_file;
bprm->argc = count(argv, MAX_ARG_STRINGS);
if ((retval = bprm->argc) < 0)
goto out_mm;
bprm->envc = count(envp, MAX_ARG_STRINGS);
if ((retval = bprm->envc) < 0)
goto out_mm;
retval = security_bprm_alloc(bprm);
if (retval)
goto out;
retval = prepare_binprm(bprm);
if (retval < 0)
goto out;
retval = copy_strings_kernel(1, &bprm->filename, bprm);
if (retval < 0)
goto out;
bprm->exec = bprm->p;
retval = copy_strings(bprm->envc, envp, bprm);
if (retval < 0)
goto out;
retval = copy_strings(bprm->argc, argv, bprm);
if (retval < 0)
goto out;
current->flags &= ~PF_KTHREAD;
retval = search_binary_handler(bprm,regs);
if (retval >= 0) {
/* execve success */
security_bprm_free(bprm);
acct_update_integrals(current);
free_bprm(bprm);
if (displaced)
put_files_struct(displaced);
return retval;
}
out:
if (bprm->security)
security_bprm_free(bprm);
out_mm:
if (bprm->mm)
mmput (bprm->mm);
out_file:
if (bprm->file) {
allow_write_access(bprm->file);
fput(bprm->file);
}
out_kfree:
free_bprm(bprm);
out_files:
if (displaced)
reset_files_struct(displaced);
out_ret:
return retval;
}
and CR3 loading is done in switch_mm():
static inline void switch_mm(struct mm_struct *prev,
struct mm_struct *next,
struct task_struct *tsk)
{
int cpu = smp_processor_id();
if (likely(prev != next)) {
/* stop flush ipis for the previous mm */
cpu_clear(cpu, prev->cpu_vm_mask);
#ifdef CONFIG_SMP
per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK;
per_cpu(cpu_tlbstate, cpu).active_mm = next;
#endif
cpu_set(cpu, next->cpu_vm_mask);
/* Re-load page tables */
load_cr3(next->pgd);
/*
* load the LDT, if the LDT is different:
*/
if (unlikely(prev->context.ldt != next->context.ldt))
load_LDT_nolock(&next->context);
}
#ifdef CONFIG_SMP
else {
per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK;
BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next);
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload %cr3.
*/
load_cr3(next->pgd);
load_LDT_nolock(&next->context);
}
}
#endif
}
#define deactivate_mm(tsk, mm) \
asm("movl %0,%%gs": :"r" (0));
#endif
=========================================
/*
*
* The flush IPI assumes that a thread switch happens in this order:
* [cpu0: the cpu that switches]
* 1) switch_mm() either 1a) or 1b)
* 1a) thread switch to a different mm
* 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
* Stop ipi delivery for the old mm. This is not synchronized with
* the other cpus, but smp_invalidate_interrupt ignore flush ipis
* for the wrong mm, and in the worst case we perform a superfluous
* tlb flush.
* 1a2) set cpu_tlbstate to TLBSTATE_OK
* Now the smp_invalidate_interrupt won't call leave_mm if cpu0
* was in lazy tlb mode.
* 1a3) update cpu_tlbstate[].active_mm
* Now cpu0 accepts tlb flushes for the new mm.
* 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
* Now the other cpus will send tlb flush ipis.
* 1a4) change cr3.
* 1b) thread switch without mm change
* cpu_tlbstate[].active_mm is correct, cpu0 already handles
* flush ipis.
* 1b1) set cpu_tlbstate to TLBSTATE_OK
* 1b2) test_and_set the cpu bit in cpu_vm_mask.
* Atomically set the bit [other cpus will start sending flush ipis],
* and test the bit.
* 1b3) if the bit was 0: leave_mm was called, flush the tlb.
* 2) switch %%esp, ie current
*
* The interrupt must handle 2 special cases:
* - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
* - the cpu performs speculative tlb reads, i.e. even if the cpu only
* runs in kernel space, the cpu could load tlb entries for user space
* pages.
*
* The good news is that cpu_tlbstate is local to each cpu, no
* write/read ordering problems.
*/
/*
* TLB flush IPI:
*
* 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
* 2) Leave the mm if we are in the lazy tlb mode.
*/
=========================================
int flush_old_exec(struct linux_binprm * bprm)
{
char * name;
int i, ch, retval;
char tcomm[sizeof(current->comm)];
/*
* Make sure we have a private signal table and that
* we are unassociated from the previous thread group.
*/
retval = de_thread(current);
if (retval)
goto out;
set_mm_exe_file(bprm->mm, bprm->file);
/*
* Release all of the old mmap stuff
*/
retval = exec_mmap(bprm->mm);===============>which call
activate_mm(), which load CR3.
if (retval)
goto out;
bprm->mm = NULL; /* We're using it now */
/* This is the point of no return */
current->sas_ss_sp = current->sas_ss_size = 0;
if (current->euid == current->uid && current->egid == current->gid)
set_dumpable(current->mm, 1);
else
set_dumpable(current->mm, suid_dumpable);
name = bprm->filename;
/* Copies the binary name from after last slash */
for (i=0; (ch = *(name++)) != '\0';) {
if (ch == '/')
i = 0; /* overwrite what we wrote */
else
if (i < (sizeof(tcomm) - 1))
tcomm[i++] = ch;
}
tcomm[i] = '\0';
set_task_comm(current, tcomm);
current->flags &= ~PF_RANDOMIZE;
flush_thread();
/* Set the new mm task size. We have to do that late because it may
* depend on TIF_32BIT which is only updated in flush_thread() on
=========================================
static int exec_mmap(struct mm_struct *mm)
{
struct task_struct *tsk;
struct mm_struct * old_mm, *active_mm;
/* Notify parent that we're no longer interested in the old VM */
tsk = current;
old_mm = current->mm;
mm_release(tsk, old_mm);
if (old_mm) {
/*
* Make sure that if there is a core dump in progress
* for the old mm, we get out and die instead of going
* through with the exec. We must hold mmap_sem around
* checking core_state and changing tsk->mm.
*/
down_read(&old_mm->mmap_sem);
if (unlikely(old_mm->core_state)) {
up_read(&old_mm->mmap_sem);
return -EINTR;
}
}
task_lock(tsk);
active_mm = tsk->active_mm;
tsk->mm = mm;
tsk->active_mm = mm;
activate_mm(active_mm, mm);
task_unlock(tsk);
mm_update_next_owner(old_mm);
arch_pick_mmap_layout(mm);
if (old_mm) {