进程

进程相关概念解释

程序与进程的区别

程序: 死的，存在硬盘上，只占用磁盘空间。 — 剧本
进程：活动。运行在内存中的程序。占用内存、cpu等资源。 — 戏

虚拟内存与物理内存的映射关系

PCB：进程控制块
MMU：内存管理单元，在CPU内部

. src=”./DeepinScreenshot_select-area_20200506155851.png” style=”zoom:100%;” />

进入到系统调用实际上就是靠的MMU进行权级切换

PCB 进程控制块

每个进程在内核中都有一个进程控制块（PCB）来维护进程相关信息，linux内核的进程块是task_struct结构体

/usr/linux-headers-xx.xx.x-x/include/linux/sched.h 文件中可以查看 struct task_struct结构体定义

其内部成员有很多，重点掌握以下部分即可：

进程ID：系统中每个进程有唯一的id，在c语言中用pid_t类型表示，其实就是一个非负整数
进程的状态：有初始、就绪、运行、阻塞、挂起、停止等状态
- 其中初始态为进程准备阶段，常与就绪态结合来看
进程切换时需要保存和恢复的一些CPU寄存器
描述虚拟地址空间的信息
描述控制终端的信息
当前工作目录
umask掩码
文件描述符表：包含很多指向file结构体的指针
和信号相关的信息
用户id和组id
会话（Session）和进程组
进程可以使用的资源上限

进程组和会话

进程组(别名：作业)

多个进程的集合，每个进程都属于一个进程组，简化对多个进程的管理，waitpid函数和kill函数的参数中用-pid来表示一个进程组。
父进程创建子进程的时候默认父子进程属于同一进程组。进程组的ID==第一个进程ID（组长进程）, 故组长进程标识PGID==PID
只要有一个进程存在，进程组就存在，生存期与组长进程是否终止无关
kill -SIGKILL -进程组ID(负数) 杀掉整个进程组
进程组生存期:进程组创建到最后一个进程离开(终止或转移到另一个进程组)
一个进程可以为自己或子进程设置进程组id

会话

多个进程组的集合
创建会话的注意事项:

调用进程不能是进程组组长,该进程变成新会话首进程(session leader)
该进程成为一个新进程组的组长进程。
新会话丢弃原有的控制终端,该会话没有控制终端
该调用进程是组长进程,则出错返回
建立新会话时,先调用 fork, 父进程终止,子进程调用 setsid()
部分linux需要root权限才能创建

守护进程

Daemon进程，是linux中的后台服务程序，通常独立于控制终端并且周期性地执行某种任务或等待处理
某些发生的事件。一般采用以 d 结尾的名字。
Linux 后台的一些系统服务进程,没有控制终端,不能直接和用户交互。不受用户登录、注销的影响,一直在
运行着,他们都是守护进程。如:预读入缓输出机制的实现;httpd服务器;sshd服务器等。

创建守护进程模型

创建守护进程,最关键的一步是调用 setsid 函数创建一个新的 Session,并成为 Session Leader。

创建步骤：

创建子进程,父进程退出
所有工作在子进程中进行形式上脱离了控制终端
在子进程中创建新会话
setsid()函数
使子进程完全独立出来,脱离控制
改变当前目录位置
chdir()函数
防止占用可卸载的文件系统
重设文件权限掩码
umask()函数
防止继承的文件创建屏蔽字拒绝某些权限
增加守护进程灵活性
关闭文件描述符
继承的打开文件不会用到,浪费系统资源,无法卸载
开始执行守护进程核心工作守护进程退出处理程序模型

e.g. 简单实现

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/stat.h>
#include <fcntl.h>

void say_error(const char* str){
	perror(str);
	exit(-1);
}

int main(int argc, char* argv[])
{
	pid_t pid;

	pid = fork();
	if(pid > 0) return 0;  // 父进程任务完成

	pid = setsid();
	if(-1 == pid) say_error("setsid error");

	chdir("/var/www/html"); // 该变工作目录
	umask(0022);

	// 因为新会话没有终端所以不需要 0 1 2
	close(STDIN_FILENO);
	int fd = open("/dev/null", O_RDWR);
	if(fd == -1) say_error("open error");
	dup2(fd, STDOUT_FILENO);  
	dup2(fd, STDERR_FILENO);

	while(1); // 模拟守护进程业务

	return 0;
}

进程控制

fork函数

函数原型

1 2	#include <unistd.h> pid_t fork(void);

1
2
3

返回值
	成功：父子进程分别返回，在父进程中返回被创建子进程的pid，在子进程中返回0
	失败：-1，设置`errno`，此时没有子进程被创建

例子：

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

int main(int argc, char* argv[])
{
	printf("step 1....\n");
	printf("step 2....\n");
	printf("step 3....\n");
	printf("step 4....\n");

	printf("fork start....\n");
	pid_t pid = fork();
	if (pid == -1) {
		perror("fork error");
		exit(1);
	} else if (pid == 0) {
		printf("----child to be created\n");
		printf("\t---- current pid = %d\n", pid);
		printf("\t---- my id is %d\n", getpid());
		printf("\t---- my parent is %d\n", getppid());
	} else if (pid > 0) {
		printf("---parent process: my child is %d\n", pid);
		printf("\t---- my id is %d\n", getpid());
		printf("\t---- my parent is %d\n", getppid());
	} else {
		
	}
	
	//sleep(3);
	printf("============end of file\n");

	return 0;
}

运行结果：

循环创建子进程

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

int main(int argc, char* argv[])
{
	int i = 0;
	for (;i < 5; i++) {
		if (fork() == 0) break;
	}
	if (5 == i) {
		sleep(1);
		printf("I'm parent\n");
	} else {
		sleep(0.1);
		printf("I'm %dth child\n", i+1);
	}
	return 0;
}

getpid与getppid函数

函数原型

1
2
3

#include <unistd.h>
pid_t getpid(void);
pid_t getppid(void);

返回值：该函数永远成功返回当前进程的PID或父进程的PID

getuid 与geteuid函数

获取当前进程实际用户ID

1 2	#include <unistd.h> uid_t getuid(void);

获取当前进程有效用户ID

1 2	#include <unistd.h> uid_t geteuid(void);

getgid 与getegid函数

获取当前进程使用用户组ID

1 2	#include <unistd.h> gid_t getgid(void);

获取当前进程有效用户组ID

1 2	#include <unistd.h> gid_t getegid(void);

getsid函数

获取进程的会话id

函数原型

1 2	#include <unistd.h> pid_t getsid(pid_t pid);

成功返回调用进程会话ID，失败返回-1，设置errno

setsid函数

创建一个会话，并以自己的ID设置进程组ID，同时也是新会话的ID

函数原型

1 2	#include <unistd.h> pid_t setsid(void);

成功返回调用进程会话ID，失败返回-1，设置errno

e.g. 简单使用

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/wait.h>

void say_error(const char *str){
	perror(str);
	exit(-1);
}

void print_session(const char *name){
	printf("[%s] current process PID is %d \n", name, getpid());
	printf("[%s] current process parent PID is %d \n", name, getppid());
	printf("[%s] current process Group ID is %d \n", name, getpgid(0));
	printf("[%s] current process Session ID is %d \n", name, getsid(0));
}

int main(int argc, char* argv[])
{
	pid_t pid = fork();
	if(pid == -1) say_error("fork error");

	if(!pid){ // 子进程
		print_session("sub");
		
		sleep(3);
		if(setsid() == -1) say_error("setsid error");  //设置当前子进程pid为会话id
		// 此后就得到了一个独立且脱离终端控制的新进程, 即无法与用户进行交互

		printf("chage sid after.....\n");
		print_session("sub");
	}else{ //父进程
		print_session("parent");
		wait(NULL);  //等待回收子进程
	}
	
	return 0;
}

运行结果

[parent] current process PID is 7510
[parent] current process parent PID is 7314
[parent] current process Group ID is 7510
[parent] current process Session ID is 5913
[sub] current process PID is 7511
[sub] current process parent PID is 7510
[sub] current process Group ID is 7510
[sub] current process Session ID is 5913
chage sid after.....
[sub] current process PID is 7511
[sub] current process parent PID is 7510
[sub] current process Group ID is 7511
[sub] current process Session ID is 7511

setuid和seteuid函数

setuid（）函数用来设置调用进程的用户ID(real uid和effective uid)
seteuid函数用来设置调用进程的有效用户ID(effective uid)

函数原型

1
2
3

#include <unistd.h>
int setuid(uid_t uid);
int seteuid(uid_t euid);

参数解释

@param:
	uid: 要设置成的用户ID
	euid: 要设置成的有效用户ID

@return:
	successful: 0
	failure: -1, set errno

注意事项
setuid()的传入参数uid，根据进程具有的权限情况，可分为以下2种情况：

如果进程具有超级用户权限特权，那么就能设置任意的effective uid和real uid, 注意所有与进程有关的用户ID都被设置为uid(非0),在这种情况发生后，程序就不可能重新获得root权限.
无特权用户只能用将real uid和effective uid同时设置成real uid或effective uid(e.g. 如果一个用户的real uid为1000，通过设置SGID的方式运行一个root用户的程序，即effective uid为0, 如下图，那么此时setuid()只能传入1000或0) 因此一个SGID程序希望暂时放弃root权限，以一个无权限用户的身份出现，然后重新获得root权限, 可以使用seteuid()来完成。

一般来说setuid函数用于降权使用的，一般要和suid（s权限）标志同时使用, 例如apache+php的web服务器 fork一个子进程，然后在用setuid来降低权限，来提高web服务器的安全性。
e.g. 简单的使用示例

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/wait.h>

void say_error(const char *str){
	perror(str);
	exit(-1);
}

void print_uid(const char *name){
	printf("[%s] current UID is %d \n", name, getuid());
	printf("[%s] current effective UID is %d \n", name, geteuid());
}

int main(int argc, char* argv[])
{
	pid_t pid = fork();
	if(pid == -1) say_error("fork error");

	if(!pid){ // 子进程
		print_uid("sub");
		if(seteuid(getuid()) == -1) say_error("setuid error");  //降权
		printf("chage uid after.....\n");
		print_uid("sub");

		// 做一些不需要那么高权限作的事情

	}else{ //父进程
		print_uid("parent");
		wait(NULL);  //等待回收子进程
	}
	
	return 0;
}

编译并给程序赋予s权限

1	make && sudo chown 0:0 foo && sudo chmod u+s foo

运行结果

[parent] current UID is 1000
[parent] current effective UID is 0
[sub] current UID is 1000
[sub] current effective UID is 0
chage uid after.....
[sub] current UID is 1000
[sub] current effective UID is 1000

进程共享

父子进程相同之处：
- 刚fork后。代码段、data段、堆、栈、环境变量、全局变量、宿主目录位置、进程工作目录位置、信号处理方式
父子进程不同之处：
- 进程id、返回值、各自的父进程、进程创建时间、闹钟、未决信号集
对于全局变量，父子进程间遵循读时共享写时复制的原则
父子进程共享：
1. 文件描述符（打开文件的结构体）
2. mmap建立的映射区
fork之后，父进程先执行还是子进程先执行不确定。取决于内核所使用的调度算法

gdb调试

使用gdb调试时，gbd只能跟踪一个进程，默认跟踪父进程

可以在调用fork函数之前通过指令来设置gdb跟踪父进程或子进程

set follow-fork-mode child命令设置gdb在fork之后跟踪子进程
set follow-fork-mode parent设置跟踪父进程

注意一定要在fork函数调用之前才有效

exec函数族

当进程调用exec函数时，该进程的用户空间代码段.text与数据段.data完全被新程序替换，然后从新的.text第一条指令开始执行，但进程PID不变，换核不换壳

有六种以exec开头的函数，统称exec函数：

#include <unistd.h>

int execl(const char *pathname, const char *arg, ...
           /* (char  *) NULL */);

int execlp(const char *file, const char *arg, ...
           /* (char  *) NULL */);

int execle(const char *pathname, const char *arg, ...
           /*, (char *) NULL, char *const envp[] */);

int execv(const char *pathname, char *const argv[]);

int execvp(const char *file, char *const argv[]);

int execvpe(const char *file, char *const argv[], char *const envp[]);

execlp函数

加载一个进程，借助PATH环境变量

函数原型

#include <unistd.h>

int execlp(const char *file, const char *arg, ...
		   /* (char  *) NULL */);
// NULL 当成传参结束的哨兵
// 比如 execlp("ls", "ls", "-l", "-h", (char *)NULL);
// arg 从 argv[0]开始计算，所以要写两个 ls

参数：
- file：要加载程序的名字，该函数需要配合PATH环境变量来使用，当PATH中所以目录搜索后没有该参数的值则报错
返回：
- 成功：无返回
- 失败：-1
该函数通常调用系统程序如cp、ls、date、cat

例子：

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

int main(int argc, char* argv[])
{
	pid_t pid = fork();

	if (pid == -1) {
		perror("fork error");
		exit(1);

	} else if (pid == 0) {  // 子进程
		execlp("ls", "ls", "-lh", NULL);
		// 只有exec出错才有机会进入后面代码，否则已经进入别的程序中执行了
		perror("exec error");
		exit(1);

	} else if (pid > 0) {  // 父进程
		sleep(1);
		printf("I'm parent %d\n", getpid());

	} else {
		
	}
	
	return 0;
}

execl函数

指定路径，加载一个进程

1 2	execl("./a.out", "a.out", NULL); execl("/usr/bin/ls", "ls", "-lah", NULL);

execvp函数

函数原型

1 2	#include <unistd.h> int execvp(const char file, char const argv[]);

用法：

1 2	char *ls_argv[] = {"ls", "-l", "-h", NULL}; execvp("ls", ls_argv);

exec 函数族一般规律

exec函数一旦调用成功即执行新的程序，不返回。只有失败才返回，错误值-1
所以通常直接在exec函数调用后直接调用perror()和exit()，无需if判断

函数名中的字母	描述
l(list)	命令行参数列表
p(path)	搜索file时使用PATH环境变量
v(vector)	使用命令行参数数组
e(environment)	使用环境变量数组，不使用进程原有的环境变量，设置新加载程序运行的环境变量

实际上，只有execve是真正的系统调用，其他exec函数最终都调用execve，所以execve在man手册的第二卷，其他函数在man手册的第三卷，这些函数的关系如下图所示：

回收子进程

孤儿进程

孤儿进程：父进程先于子进程结束，则子进程成为孤儿进程，

子进程的父进程成为init进程，称为init进程领养孤儿进程。

僵尸进程

僵尸进程：进程终止，父进程尚未回收，子进程残留资源(PCB) 存在于内核中，变成僵尸进程。

注意：僵尸进程是不能用kill命令清除掉的。因为kill命令只是用来终止进程的，而僵尸进程已经终止了，这种时候只能杀死父进程来终止僵尸进程

wait函数

父进程调用wait函数可以回收子进程终止信息，该函数有三个功能：

阻塞等待子进程退出
回收子进程残留资源
获取子进程结束状态（退出原因）

函数原型

#include <sys/types.h>
#include <sys/wait.h>

pid_t wait(int *wstatus);

参数
	 wstatus:可以用来传出进程的退出原因
返回值
	成功：清理掉子进程的pid
	失败：返回-1（没有子进程）

当进程终止时，操作系统的隐式回收机制会：

关闭所以文件描述符
释放用户空间分配的内存，内核中的PCB仍存在。其中保存该进程的退出状态（正常退出 -> 退出值; 异常退出 -> 终止信号）

可使用wait函数传出参数status来保存进程的退出状态，借助宏函数来进一步判断进程终止的具体原因，宏函数可分为三组：

WIFEXITED(status)：为非0 -> 进程正常结束；

WEXITSTATUS(status) ：如上宏为真，使用此宏获取进程退出状态(exit的参数)
WIFSIGNALED(status) 为非0 -> 进程异常终止

WTERMSIG(status) 如上宏为真，使用此宏取得使进程终止的那个信号的编号。
WIFSTOPPED(status) 为非0 -> 进程处于暂停状态

WSTOPSIG(status) 如上宏为真，使用此宏取得使进程暂停的那个信号的编号。
WIFCONTINUED(status) 为真进程暂停后已经继续运行

waitpid 函数

作用同wait，但可指定pid进程清理，可以不阻塞

函数原型

#include <sys/types.h>
#include <sys/wait.h>

pid_t waitpid(pid_t pid, int *wstatus, int options);

特殊参数：
	@pid：
		- 大于0： 回收指定pid 的子进程
		- 等于-1：回收任意子进程（相当于wait）
		- 等于 0：回收和当前调用waitpid一个组的所有子进程
		- 小于-1：回收指定进程组内的任意子进程
返回值：
	- 成功：返回清理掉的子进程ID
	- 失败：-1（无子进程）
	- 返回0：函数调用时参数`options`为`WNOHANG`，且子进程正在运行

waitpid(-1, &wstatus, 0)等价wait(&wstatus)

注意：一次wait或waitpid调用只能清理一个子进程，清理多个子进程应使用循环。

e.g.

#include <stdio.h>
#include <unistd.h>
#include <sys/wait.h>


int main(int argv, char* argc[])
{
	int i = 0;
	for(;i < 5; ++i){
		if (fork() == 0) break;
	}
	if (5 == i) { // parents
		int wpid = 0, wstatus = 0;
		while((wpid = waitpid(-1, &wstatus, WNOHANG)) != -1) { //不阻塞回收
			if(wpid > 0){
				printf("catch the child %d \n", wpid);
				if(WIFEXITED(wstatus))
					printf("reason code: %d\n", WEXITSTATUS(wstatus));
				if(WIFSIGNALED(wstatus))
					printf("reason code: %d\n", WTERMSIG(wstatus));
				if(WIFSTOPPED(wstatus))
					printf("reason code: %d\n", WSTOPSIG(wstatus));
			}else if(wpid == 0){
				sleep(1);
				continue;
			}else{
			}
		}
		
	}else { // child
		sleep(i);
		printf("I'm %dth child, pid = %d\n", i+1, getpid());
		return 233;
	}

	return 0;
}

运行结果：

I'm 1th child, pid = 58902
I'm 2th child, pid = 58903
I'm 3th child, pid = 58904
I'm 4th child, pid = 58905
I'm 5th child, pid = 58906
catch the child 58902 
reason code: 233
catch the child 58903 
reason code: 233
catch the child 58904 
reason code: 233
catch the child 58905 
reason code: 233
catch the child 58906 
reason code: 233