出处:http://ericxiao.cublog.cn/ ------------------------------------------ 在linux内核,用linux_binfmt结构来表示每一个加载模块.它的定义如下: struct linux_binfmt { //用来构成链表 struct list_head lh; //所属的module struct module *module; //加载可执行文件 int (*load_binary)(struct linux_binprm *, struct pt_regs * regs); //加载共享库 int (*load_shlib)(struct file *); int (*core_dump)(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); unsigned long min_coredump; /* minimal dump size */ int hasvdso; } 结构中的lh将之组成一个链表,这个链表的表头是formats. 为了说明,我们来看一下如何注册一个可执行文件的加载模块. int register_binfmt(struct linux_binfmt * fmt) { if (!fmt) return -EINVAL; write_lock(&binfmt_lock); //将其添加之链表 list_add(&fmt->lh, &formats); write_unlock(&binfmt_lock); return 0; } 所以,在加载可执文件的时候,只要遍历formats这个链表,然后依次按module加载这个可执行文件.这正是search_binary_handler()所做的.代码如下: int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) { int try,retval; struct linux_binfmt *fmt; #ifdef __alpha__ /* handle /sbin/loader.. */ { struct exec * eh = (struct exec *) bprm->buf; if (!bprm->loader && eh->fh.f_magic == 0x183 && (eh->fh.f_flags & 0x3000) == 0x3000) { struct file * file; unsigned long loader; allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; loader = bprm->vma->vm_end - sizeof(void *); file = open_exec("/sbin/loader"); retval = PTR_ERR(file); if (IS_ERR(file)) return retval; /* Remember if the application is TASO. */ bprm->sh_bang = eh->ah.entry bprm->file = file; bprm->loader = loader; retval = prepare_binprm(bprm); if (retval return retval; /* should call search_binary_handler recursively here, but it does not matter */ } } #endif retval = security_bprm_check(bprm); if (retval) return retval; /* kernel module loader fixup */ /* so we don't try to load run modprobe in kernel space. */ set_fs(USER_DS); retval = audit_bprm(bprm); if (retval) return retval; retval = -ENOENT; //这里会循环两次.待模块加载之后再遍历一次 for (try=0; try read_lock(&binfmt_lock); list_for_each_entry(fmt, &formats, lh) { //加载函数 int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary; if (!fn) continue; if (!try_module_get(fmt->module)) continue; read_unlock(&binfmt_lock); //运行加载函数,如果加载末成功,则继续遍历 retval = fn(bprm, regs); //加载成功了 if (retval >= 0) { put_binfmt(fmt); allow_write_access(bprm->file); if (bprm->file) fput(bprm->file); bprm->file = NULL; current->did_exec = 1; proc_exec_connector(current); return retval; } read_lock(&binfmt_lock); put_binfmt(fmt); if (retval != -ENOEXEC || bprm->mm == NULL) break; if (!bprm->file) { read_unlock(&binfmt_lock); return retval; } } read_unlock(&binfmt_lock); //所有模块加载这个可执行文件失败,则加载其它模块再试一次 if (retval != -ENOEXEC || bprm->mm == NULL) { break; //CONFIG_KMOD:动态加载模块标志 #ifdef CONFIG_KMOD }else{ #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20 if (printable(bprm->buf[0]) && printable(bprm->buf[1]) && printable(bprm->buf[2]) && printable(bprm->buf[3])) break; /* -ENOEXEC */ request_module("binfmt- x", *(unsigned short *)(&bprm->buf[2])); #endif } } return retval; } 到这里,我们看到了可执行文件的加载过程,接下来,我们以a.out型的可执文件的加载过程为例.来看一看linux怎么处理可执行文件的. 四:a.out文件格式的加载 a.out模块的处理是在binfmt.aout.c中.如下示: static struct linux_binfmt aout_format = { .module = THIS_MODULE, .load_binary = load_aout_binary, .load_shlib = load_aout_library, .core_dump = aout_core_dump, .min_coredump = PAGE_SIZE }; 对应的加载接口为load_aout_binary().代码如下: static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) { struct exec ex; unsigned long error; unsigned long fd_offset; unsigned long rlim; int retval; //文件头信息匹配 ex = *((struct exec *) bprm->buf); /* exec-header */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || i_size_read(bprm->file->f_path.dentry->d_inode) return -ENOEXEC; } /* * Requires a mmap handler. This prevents people from using a.out * as part of an exploit attack against /proc-related vulnerabilities. */ //如果文件不支持OPEN,或者MMAP。无效 if (!bprm->file->f_op || !bprm->file->f_op->mmap) return -ENOEXEC; //可执行文件正文的起始位置 //每种类型的正文起始位置 fd_offset = N_TXTOFF(ex); /* Check initial limits. This avoids letting people circumvent * size limits imposed on them by creating programs with large * arrays in the data or bss. */ //判断data bss是否超过了限制 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; if (rlim >= RLIM_INFINITY) rlim = ~0; if (ex.a_data ex.a_bss > rlim) return -ENOMEM; /* Flush all traces of the currently running executable */ //已经取得了足够的信息,是跟当前进程脱离的时候了 retval = flush_old_exec(bprm); if (retval) return retval; /* OK, This is the point of no return */ #if defined(__alpha__) SET_AOUT_PERSONALITY(bprm, ex); #elif defined(__sparc__) set_personality(PER_SUNOS); #if !defined(__sparc_v9__) memcpy(¤t->thread.core_exec, &ex, sizeof(struct exec)); #endif #else //设置进程的个性标志 set_personality(PER_LINUX); #endif //设置进程的代码段的起始与终止位置 current->mm->end_code = ex.a_text (current->mm->start_code = N_TXTADDR(ex)); //设置进程数段段的起始与终止位置 current->mm->end_data = ex.a_data (current->mm->start_data = N_DATADDR(ex)); //设置进程BSS区间 current->mm->brk = ex.a_bss (current->mm->start_brk = N_BSSADDR(ex)); current->mm->free_area_cache = current->mm->mmap_base; current->mm->cached_hole_size = 0; compute_creds(bprm); //进程已经fork 完成了,不再需要PF_FORKNOEXEC current->flags &= ~PF_FORKNOEXEC; #ifdef __sparc__ if (N_MAGIC(ex) == NMAGIC) { loff_t pos = fd_offset; /* Fuck me plenty... */ /* */ down_write(¤t->mm->mmap_sem); error = do_brk(N_TXTADDR(ex), ex.a_text); up_write(¤t->mm->mmap_sem); bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex), ex.a_text, &pos); down_write(¤t->mm->mmap_sem); error = do_brk(N_DATADDR(ex), ex.a_data); up_write(¤t->mm->mmap_sem); bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex), ex.a_data, &pos); goto beyond_if; } #endif //如果是OMAGIC格式 if (N_MAGIC(ex) == OMAGIC) { unsigned long text_addr, map_size; loff_t pos; text_addr = N_TXTADDR(ex); #if defined(__alpha__) || defined(__sparc__) pos = fd_offset; map_size = ex.a_text ex.a_data PAGE_SIZE - 1; #else pos = 32; map_size = ex.a_text ex.a_data; #endif down_write(¤t->mm->mmap_sem); //为进程的代码段分配空间 error = do_brk(text_addr & PAGE_MASK, map_size); up_write(¤t->mm->mmap_sem); if (error != (text_addr & PAGE_MASK)) { send_sig(SIGKILL, current, 0); return error; } //读文件数据读入代码段 error = bprm->file->f_op->read(bprm->file, (char __user *)text_addr, ex.a_text ex.a_data, &pos); if ((signed long)error send_sig(SIGKILL, current, 0); return error; } //x86上为一空函数 flush_icache_range(text_addr, text_addr ex.a_text ex.a_data); } else { static unsigned long error_time, error_time2; //数据段,代码段是否页框对齐 if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) { printk(KERN_NOTICE "executable not page aligned\n"); error_time2 = jiffies; } // if ((fd_offset & ~PAGE_MASK) != 0 && (jiffies-error_time) > 5*HZ) { printk(KERN_WARNING "fd_offset is not page aligned. Please convert program: %s\n", bprm->file->f_path.dentry->d_name.name); error_time = jiffies; } if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { //不支持mmap loff_t pos = fd_offset; down_write(¤t->mm->mmap_sem); //分配段空间 do_brk(N_TXTADDR(ex), ex.a_text ex.a_data); up_write(¤t->mm->mmap_sem); //从文件中读入相关数据 bprm->file->f_op->read(bprm->file, (char __user *)N_TXTADDR(ex), ex.a_text ex.a_data, &pos); flush_icache_range((unsigned long) N_TXTADDR(ex), (unsigned long) N_TXTADDR(ex) ex.a_text ex.a_data); goto beyond_if; } //如果支持MMAP。将直接将文件映射到内存即可 down_write(¤t->mm->mmap_sem); error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset); up_write(¤t->mm->mmap_sem); if (error != N_TXTADDR(ex)) { send_sig(SIGKILL, current, 0); return error; } down_write(¤t->mm->mmap_sem); error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset ex.a_text); up_write(¤t->mm->mmap_sem); if (error != N_DATADDR(ex)) { send_sig(SIGKILL, current, 0); return error; } } beyond_if: //设置进程的binfmt set_binfmt(&aout_format); //为BSS段分配空间 retval = set_brk(current->mm->start_brk, current->mm->brk); if (retval //分配失败,发送SIGKILL信号,杀掉当前进程 send_sig(SIGKILL, current, 0); return retval; } //扩大进程的栈 retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); if (retval /* Someone check-me: is this error path enough? */ send_sig(SIGKILL, current, 0); return retval; } //调整栈空间的布局 current->mm->start_stack = (unsigned long) create_aout_tables((char __user *) bprm->p, bprm); #ifdef __alpha__ regs->gp = ex.a_gpvalue; #endif //设置新的EIP与ESP.使其返回到用户空间后,可以开始运行这个程序 start_thread(regs, ex.a_entry, current->mm->start_stack); if (unlikely(current->ptrace & PT_PTRACED)) { if (current->ptrace & PT_TRACE_EXEC) ptrace_notify ((PTRACE_EVENT_EXEC else send_sig(SIGTRAP, current, 0); } return 0; } 首先判断文件的文件头信息,检查是否属于a.out文件.属于不属于a.out再出错退出,让其它module进行选择. 因为execve()系统调用会完全代替进程,因此,在运行该进程之前,先解除父子进程的共享关系,这是由flush_old_exec()完成的.代码如下: int flush_old_exec(struct linux_binprm * bprm) { char * name; int i, ch, retval; struct files_struct *files; char tcomm[sizeof(current->comm)]; //如果父子进程共享信号处理,脱离其共享关系 retval = de_thread(current); if (retval) goto out; //复制共享的文件 files = current->files; /* refcounted so safe to hold */ retval = unshare_files(); if (retval) goto out; //进程的用户空间有可能是父进程的复制品.使之独立 //使进程的mm切换为bprm->mm //这就是我们之前千亲万苦初始化bprm->mm的原因 retval = exec_mmap(bprm->mm); if (retval) goto mmap_failed; bprm->mm = NULL; /* We're using it now */ put_files_struct(files); current->sas_ss_sp = current->sas_ss_size = 0; if (current->euid == current->uid && current->egid == current->gid) set_dumpable(current->mm, 1); else set_dumpable(current->mm, suid_dumpable); name = bprm->filename; /* Copies the binary name from after last slash */ //取可执行文件的名字 for (i=0; (ch = *(name )) != '\0';) { if (ch == '/') i = 0; /* overwrite what we wrote */ else if (i tcomm[i ] = ch; } tcomm = '\0'; //task->com:保存可执行文件名 set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; //flush_thread:只与协处理器和DEBUG有关 flush_thread(); current->mm->task_size = TASK_SIZE; if (bprm->e_uid != current->euid || bprm->e_gid != current->egid) { suid_keys(current); set_dumpable(current->mm, suid_dumpable); current->pdeath_signal = 0; } else if (file_permission(bprm->file, MAY_READ) || (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { suid_keys(current); set_dumpable(current->mm, suid_dumpable); } /* An exec changes our domain. We are no longer part of the thread group */ current->self_exec_id ; //因为解除了跟父进程的共享关系,所以 //将信号处理函数改为默认的操作 flush_signal_handlers(current, 0); //关闭打开的文件 flush_old_files(current->files); return 0; mmap_failed: reset_files_struct(current, files); out: return retval; } 我们重点分析一下exec_mmap(): static int exec_mmap(struct mm_struct *mm) { struct task_struct *tsk; struct mm_struct * old_mm, *active_mm; tsk = current; old_mm = current->mm; mm_release(tsk, old_mm); if (old_mm) { down_read(&old_mm->mmap_sem); if (unlikely(old_mm->core_waiters)) { up_read(&old_mm->mmap_sem); return -EINTR; } } task_lock(tsk); active_mm = tsk->active_mm; tsk->mm = mm; tsk->active_mm = mm; //切换进程的执行空间.这个过程我们在进程切换跟调度的时候再来做详细的分析 activate_mm(active_mm, mm); task_unlock(tsk); arch_pick_mmap_layout(mm); // 减少old_mm,active_mm的引用计数,如果引用计数为零,则释放其所占 //空间,或者断开映射 if (old_mm) { up_read(&old_mm->mmap_sem); BUG_ON(active_mm != old_mm); mmput(old_mm); return 0; } mmdrop(active_mm); return 0; } 值得注意的是mm_release()中有一个重要的操作: void mm_release(struct task_struct *tsk, struct mm_struct *mm) { struct completion *vfork_done = tsk->vfork_done; /* Get rid of any cached register state */ deactivate_mm(tsk, mm); /* notify parent sleeping on vfork() */ //如果创建子进程的时候带了CLONE_VFORK。其在子进程已经使用完了 //是该唤醒父进程了 if (vfork_done) { tsk->vfork_done = NULL; complete(vfork_done); } /* * If we're exiting normally, clear a user-space tid field if * requested. We leave this alone when dying by signal, to leave * the value intact in a core dump, and to save the unnecessary * trouble otherwise. Userland only wants this done for a sys_exit. */ if (tsk->clear_child_tid && !(tsk->flags & PF_SIGNALED) && atomic_read(&mm->mm_users) > 1) { u32 __user * tidptr = tsk->clear_child_tid; tsk->clear_child_tid = NULL; /* * We don't check the error code - if userspace has * not set up a proper pointer then tough luck. */ put_user(0, tidptr); sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0); } } 还记得我们之前讨论过的CLONE_VFOR标志吗?到这里就可以唤醒父进程了.因为此时子进程结束了对父进程空间的共享. 与父进程脱离关系之后,子进程就拥有了自己独立的资源.然后加载数据段和代码段.分配BSS段空间.把栈空间也伸缩适当大小. 之后我们遇到的再一个重点是栈空间的布局.我们来分析这一个过程. static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) { …… …… current->mm->start_stack = (unsigned long) create_aout_tables((char __user *) bprm->p, bprm); #ifdef __alpha__ regs->gp = ex.a_gpvalue; #endif start_thread(regs, ex.a_entry, current->mm->start_stack); …… } Creat_aout_tables()代码如下: static unsigned long __user *create_aout_tables(char __user *p, struct linux_binprm * bprm) { char __user * __user *argv; char __user * __user *envp; unsigned long __user *sp; //可执行文件的参数个数 int argc = bprm->argc; //环境变量的个数 int envc = bprm->envc; //sp初始化成p,也即bprm->p //对应下面图的初始化状态(1) sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p); #ifdef __sparc__ /* This imposes the proper stack alignment for a new process. */ sp = (void __user *) (((unsigned long) sp) & ~7); if ((envc argc 3)&1) --sp; #endif #ifdef __alpha__ /* whee.. test-programs are so much fun. */ put_user(0, --sp); put_user(0, --sp); if (bprm->loader) { put_user(0, --sp); put_user(0x3eb, --sp); put_user(bprm->loader, --sp); put_user(0x3ea, --sp); } put_user(bprm->exec, --sp); put_user(0x3e9, --sp); #endif sp -= envc 1; envp = (char __user * __user *) sp; sp -= argc 1; argv = (char __user * __user *) sp; #if defined(__i386__) || defined(__mc68000__) || defined(__arm__) || defined(__arch_um__) put_user((unsigned long) envp,--sp); put_user((unsigned long) argv,--sp); #endif put_user(argc,--sp); //对应下面分析图中的(2) current->mm->arg_start = (unsigned long) p; while (argc-->0) { char c; put_user(p,argv ); do { get_user(c,p ); } while (c); } put_user(NULL,argv); current->mm->arg_end = current->mm->env_start = (unsigned long) p; while (envc-->0) { char c; put_user(p,envp ); do { get_user(c,p ); } while (c); } put_user(NULL,envp); current->mm->env_end = (unsigned long) p; //对应分析图中的(3) return sp; } 我们用图来表示上面的操作过程: screen.width*0.7) {this.resized=true; this.width=screen.width*0.7; this.alt='Click here to open new window\nCTRL Mouse wheel to zoom in/out';}" onmouseover="if(this.width>screen.width*0.7) {this.resized=true; this.width=screen.width*0.7; this.style.cursor='hand'; this.alt='Click here to open new window\nCTRL Mouse wheel to zoom in/out';}" onclick="if(!this.resized) {return true;} else {window.open('http://file:///E:/实验室项目/ELF进程加载相关/新建文件夹/linux进程管理之可执行文件的加载和运行(二) - linux进程管理 - Linux内核学习.files/080411170951.jpg');}" onmousewheel="return imgzoom(this);" alt="" /> screen.width*0.7) {this.resized=true; this.width=screen.width*0.7; this.alt='Click here to open new window\nCTRL Mouse wheel to zoom in/out';}" onmouseover="if(this.width>screen.width*0.7) {this.resized=true; this.width=screen.width*0.7; this.style.cursor='hand'; this.alt='Click here to open new window\nCTRL Mouse wheel to zoom in/out';}" onclick="if(!this.resized) {return true;} else {window.open('http://file:///E:/实验室项目/ELF进程加载相关/新建文件夹/linux进程管理之可执行文件的加载和运行(二) - linux进程管理 - Linux内核学习.files/080411171026.jpg');}" onmousewheel="return imgzoom(this);" alt="" /> 对照上面的分析图就很容易看懂代码了. 最后,设置eip的值为可执行文件中main函数对齐的地址,esp为当前栈指针位置,返回到用户空间就可以顺利的执行了.这一过程是start_thread()完成的.这个函数比较简单,就不分析了. |
|小黑屋|最新主题|手机版|微赢网络技术论坛 ( 苏ICP备08020429号 )
GMT+8, 2024-9-30 09:36 , Processed in 0.203411 second(s), 12 queries , Gzip On, MemCache On.
Powered by Discuz! X3.5
© 2001-2023 Discuz! Team.