

#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
#include <pthread.h>struct ProcessGuard {pthread_mutex_t pids_mutex;pid_t pid;


void chile_process() {while (1) {printf("This is the child process. My PID is %d.My thread_id is %lu.\n", getpid(), pthread_self());sleep(1);}
}void create_process_routine() {printf("This is the child thread of parent process. My PID is %d.My thread_id is %lu.\n", getpid(), pthread_self());while (1) {int child = 0;if (child == 0) {pthread_mutex_lock(&g_guard->pids_mutex);}if (g_guard->pid != 0) {continue;    }pid_t pid = fork();sleep(1);printf("Create child process %d.\n", pid);if (pid < 0) {perror("fork failed");}else if (pid == 0) {chile_process();child = 1;break;}else {// parent processg_guard->pid = pid;printf("dispatch task to process. pid is %d.\n", pid);}if (child == 0) {pthread_mutex_unlock(&g_guard->pids_mutex);  }else {break;}}


void sighandler(int signum) {printf("This is the parent process.Catch signal %d.My PID is %d.My thread_id is %lu.\n", signum, getpid(), pthread_self());pthread_mutex_lock(&g_guard->pids_mutex);g_guard->pid = 0;pthread_mutex_unlock(&g_guard->pids_mutex);


int main(void) {pthread_t creat_process_tid;g_guard = malloc(sizeof(struct ProcessGuard));pthread_mutex_t pids_mutex;if (pthread_mutex_init(&g_guard->pids_mutex, NULL) != 0) {perror("init pids_mutex error.");exit(1);}g_guard->pid = 0;printf("This is the Main thread of parent process.PID is %d.My thread_id is %lu.\n", getpid(), pthread_self());signal(SIGCHLD, sighandler);pthread_create(&creat_process_tid, NULL, (void*)create_process_routine, NULL);while(1)  {printf("Get task from network.\n");sleep(1);}pthread_mutex_destroy(&g_guard->pids_mutex);return 0;



$ ./test
This is the Main thread of parent process.PID is 17641.My thread_id is 140014057678656.
Get task from network.
This is the child thread of parent process. My PID is 17641.My thread_id is 140014049122048.
Create child process 17643.
dispatch task to process. pid is 17643.
Create child process 0.
This is the child process. My PID is 17643.My thread_id is 140014049122048.
Get task from network.
This is the child process. My PID is 17643.My thread_id is 140014049122048.
This is the child process. My PID is 17643.My thread_id is 140014049122048.
Get task from network.
This is the child process. My PID is 17643.My thread_id is 140014049122048.
This is the child process. My PID is 17643.My thread_id is 140014049122048.
This is the child process. My PID is 17643.My thread_id is 140014049122048.
Get task from network.
This is the child process. My PID is 17643.My thread_id is 140014049122048.
Get task from network.
This is the child process. My PID is 17643.My thread_id is 140014049122048.
This is the child process. My PID is 17643.My thread_id is 140014049122048.
Get task from network.
This is the child process. My PID is 17643.My thread_id is 140014049122048.
This is the child process. My PID is 17643.My thread_id is 140014049122048.
This is the child process. My PID is 17643.My thread_id is 140014049122048.
Get task from network.
This is the child process. My PID is 17643.My thread_id is 140014049122048.
This is the child process. My PID is 17643.My thread_id is 140014049122048.
Get task from network.
This is the parent process.Catch signal 17.My PID is 17641.My thread_id is 140014049122048.
Get task from network.
Get task from network.
Get task from network.
Get task from network.
Get task from network.

这个和我们代码设计不符合,而且不太符合逻辑。于是我们使用gdb attach主进程。

Attaching to process 17641
[New LWP 17642]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
0x00007f578fb7a9d0 in __GI___nanosleep (requested_time=requested_time@entry=0x7fffd2b41190, remaining=remaining@entry=0x7fffd2b41190) at ../sysdeps/unix/sysv/linux/nanosleep.c:28
28      ../sysdeps/unix/sysv/linux/nanosleep.c: No such file or directory.
(gdb) info threadsId   Target Id         Frame
* 1    Thread 0x7f57902be740 (LWP 17641) "test" 0x00007f578fb7a9d0 in __GI___nanosleep (requested_time=requested_time@entry=0x7fffd2b41190, remaining=remaining@entry=0x7fffd2b41190)at ../sysdeps/unix/sysv/linux/nanosleep.c:282    Thread 0x7f578fa95700 (LWP 17642) "test" __lll_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
(gdb) t 2
[Switching to thread 2 (Thread 0x7f578fa95700 (LWP 17642))]
#0  __lll_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
135     ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S: No such file or directory.
(gdb) bt
#0  __lll_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
#1  0x00007f578fe91023 in __GI___pthread_mutex_lock (mutex=0x55c51383e260) at ../nptl/pthread_mutex_lock.c:78
#2  0x000055c512c29a9d in sighandler ()
#3  <signal handler called>
#4  __lll_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:133
#5  0x00007f578fe91023 in __GI___pthread_mutex_lock (mutex=0x55c51383e260) at ../nptl/pthread_mutex_lock.c:78
#6  0x000055c512c29b42 in create_process_routine ()
#7  0x00007f578fe8e6db in start_thread (arg=0x7f578fa95700) at pthread_create.c:463
#8  0x00007f578fbb788f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95



A process-directed signal may be delivered to any
one of the threads that does not currently have the signal blocked.
If more than one of the threads has the signal unblocked, then the
kernel chooses an arbitrary thread to which to deliver the signal.

这句话是说process-directed signal会被投递到当前没有被标记不接受该signal的任意一个线程中。 具体是哪个,是由系统内核决定的。这就意味着我们的sighandler可能在主线程中执行,也可能在子线程中执行。于是发生了我们上面的死锁现象。



