Circular debugging using ptrace results in deadlock due to race condition?
Hi guys,
As part of my personal research I am facing a challenging problem.
I am trying to let two processes be each other's debuggers using the ptrace syscall. However, my proof-of-concept implementation always results in a deadlock state (both processes get stuck in 't+' state as shown by 'ps aux').
Here is my code, it's pretty simple:
/* C standard headers */
#include <errno.h>
#include <inttypes.h>
#include <setjmp.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Linux headers */
#include <dirent.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <sys/user.h>
/* Architecture-specific headers */
#include <asm/ptrace.h>
#include <asm/unistd.h>
#include <signal.h>
#include <assert.h>
typedef void fun_moved_from_context();
// using namespace std;
void attachTo(pid_t pid, char* id) {
long ret = ptrace(PTRACE_ATTACH, pid, NULL, NULL);
printf("\t%s\tattachTo: %ld\n", id, ret);
if (ret == -1) perror("err: ");
}
void seizeTo(pid_t pid, char* id) {
long ret = ptrace(PTRACE_SEIZE, pid, NULL, NULL);
assert(ret > 0);
printf("\t%s\tseizeTo: %ld\n", id, ret);
}
void detachFrom(pid_t pid, char* id) {
long ret = ptrace(PTRACE_DETACH, pid, NULL, NULL);
printf("\t%s\tdetachFrom: %ld\n", id, ret);
}
void setOptions(pid_t pid, char* id) {
long ret = ptrace(
PTRACE_SETOPTIONS, pid, NULL,
(void*)(PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC | PTRACE_O_TRACEEXIT |
PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK));
printf("\t%s\tsetOptions: %ld\n", id, ret);
}
void setVarData(pid_t pid, volatile bool* can_run, void* data, char* id) {
long ret = ptrace(PTRACE_POKEDATA, pid, (void*)can_run, (void*)data);
printf("\t%s\tsetVarData: %ld\n", id, ret);
}
void cont(pid_t pid, char* id) {
long ret = ptrace(PTRACE_CONT, pid, NULL, NULL);
printf("\t%s\tcont: %ld\n", id, ret);
}
void interrupt(pid_t pid, char* id) {
long ret = ptrace(PTRACE_INTERRUPT, pid, NULL, NULL);
printf("\t%s\tinterrupt: %ld\n", id, ret);
}
void debug(int id) {
int status;
while (true) {
printf("\t%s\twhile\n", id);
sleep(1);
struct user_regs regs;
pid_t recv = wait(&status);
if (recv == -1) {
printf("\t%s\tDebugger exiting\n", id);
return 0;
} else {
if (WIFSTOPPED(status)) {
int signal = WSTOPSIG(status);
switch (signal) {
case SIGTRAP: {
int event_code = (status >> 8) ^ SIGTRAP;
switch (event_code) {
case PTRACE_EVENT_FORK << 8:
printf("\t%s\tFORK EVENT.\n", id);
cont(recv, 2);
break;
case PTRACE_EVENT_EXIT << 8:
printf("\t%s\t%li exited.\n", id, recv);
return 0;
break;
default: {
printf("\t%s\trecv: %i ; status: %i\n", id, recv, status);
long ret = ptrace(PTRACE_GETREGS, recv, NULL, ®s);
regs.uregs[15] += 2; // addr_size;
printf("\t%s\tp: new PC: %lx\n", id, regs.uregs[15]);
ptrace(PTRACE_SETREGS, recv, NULL, ®s);
cont(recv, id);
}
}
}
}
}
}
}
}
int main() {
volatile bool can_runA = false, can_runB = false;
pid_t procA = getpid();
volatile pid_t procB = 0;
if (fork() > 0) { // process A
while (!can_runA) {
printf("\tA\twaiting to continue...\n");
sleep(1);
}
attachTo(procB, "A");
waitpid(procB, NULL, __WALL);
setOptions(procB, "A");
setVarData(procB, &can_runB, 1, "A");
cont(procB, "A");
printf("\tA\tfinished\n");
} else { // process B
procB = getpid();
attachTo(procA, "B");
waitpid(procA, NULL, __WALL);
setOptions(procA, "B");
setVarData(procA, &can_runA, 1, "B");
setVarData(procA, &procB, procB, "B");
cont(procA, "B");
while (!can_runB) {
printf("\tB\twaiting to continue...\n");
sleep(1);
}
printf("\tB\tfinished\n");
}
return 0;
}
I have compiled and run this on an ARMv7 developer board with kernel version 3.0.35 (Linaro 13.08).
The output of the above code is this:
A waiting to continue...
B attachTo: 0
B setOptions: 0
B setVarData: 0
B setVarData: 0
B cont: 0
B waiting to continue...
B waiting to continue...
A attachTo: 0
As you can see it never reaches the "finished" printf code, and gets stuck as soon as the other process attempts to attach to the debugger.
I have done a similar experiment for 3 processes, such that each one attempts to attach to the other in a circular fashion: A -> B -> C -> A
The result in this case was exactly the same. However, here I was able to detect a race condition, because sometimes the code executed properly without getting stuck in a deadlock (but it's hard to reproduce).
If you wish, you can test this by using a lightweight debugger I've developed and three console terminals. Here's the code:
/* C standard headers */
#include <errno.h>
#include <inttypes.h>
#include <setjmp.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Linux headers */
#include <dirent.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <sys/user.h>
/* Architecture-specific headers */
#include <asm/ptrace.h>
#include <asm/unistd.h>
#include <signal.h>
#include <assert.h>
typedef void fun_moved_from_context();
//using namespace std;
void attachTo(pid_t pid, int id) {
long ret = ptrace (PTRACE_ATTACH, pid, NULL, NULL);
printf("%i attachTo: %ld\n", id, ret);
}
void seizeTo(pid_t pid, int id) {
long ret = ptrace (PTRACE_SEIZE, pid, NULL, NULL);
//assert(ret > 0);
printf("%i seizeTo: %ld\n", id, ret);
}
void detachFrom(pid_t pid, int id) {
long ret = ptrace (PTRACE_DETACH, pid, NULL, NULL);
printf("%i detachFrom: %ld\n", id, ret);
}
void setOptions(pid_t pid, int id) {
long ret = ptrace(PTRACE_SETOPTIONS, pid, NULL, (void*) (PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC | PTRACE_O_TRACEEXIT | PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK));
printf("%i setOptions: %ld\n", id, ret);
}
void setVarData(pid_t pid, volatile bool* can_run, void* data, int id) {
long ret = ptrace(PTRACE_POKEDATA, pid, (void*)can_run, (void*)data);
printf("%i setVarData: %ld\n", id, ret);
}
void cont(pid_t pid, int id) {
long ret = ptrace (PTRACE_CONT, pid, NULL, NULL);
printf("%i cont: %ld\n", id, ret);
}
void interrupt(pid_t pid, int id) {
long ret = ptrace (PTRACE_INTERRUPT, pid, NULL, NULL);
printf("%i interrupt: %ld\n", id, ret);
}
void debug(int id) {
int status;
while (true) {
printf("%i while\n", id);
sleep(1);
struct user_regs regs;
pid_t recv = wait(&status);
if (recv == -1) {
printf("%i Debugger exiting\n", id);
return 0;
} else {
if (WIFSTOPPED(status)) {
int signal = WSTOPSIG(status);
printf("%i signal: %i\n",id,signal);
switch (signal) {
case SIGTRAP: {
int event_code = (status >> 8) ^ SIGTRAP;
printf("%i event_code: %i\n",id,event_code);
switch (event_code) {
case PTRACE_EVENT_FORK << 8:
printf("%i FORK EVENT.\n", id);
cont(recv,id);
break;
case PTRACE_EVENT_EXIT << 8:
printf("%i %li exited.\n", id, recv);
return 0;
break;
default: {
printf("%i recv: %i ; status: %i\n", id, recv, status);
long ret=ptrace (PTRACE_GETREGS, recv, NULL, ®s);
regs.uregs[15] += 2;//addr_size;
printf("%i p: new PC: %lx\n", id, regs.uregs[15]);
ptrace (PTRACE_SETREGS, recv, NULL, ®s);
cont(recv,id);
}
}
break;
}
default: {
cont(recv,id);
break;
}
}
}
}
}
}
int main() {
int pid;
int me = getpid();
printf("Hello, I am %d\n", me);
printf("pid:");
scanf("%d",&pid);
if (pid == 0) {
printf("bkpt asm\n");
asm("bkpt");
} else {
attachTo(pid, me);
printf("start waitpid\n");
waitpid(pid, NULL, __WALL);
printf("end waitpid\n");
setOptions(pid, me);
cont(pid,me);
debug(me);
}
return 0;
}
Once you've compiled the above code, you simply run the binary on each console and enter the PID of another process to establish a 3-way circle.
I am far from an expert on the kernel, but I did have a look at the ARM specific kernel implementation which left me puzzled. I couldn't find where/how/why this code does not work.
Now I'm wondering if it's possible at all to make this work without a deadlock occurring? Does anyone have any experience with this, or can provide some clues/feedback?
Thank you greatly for your time, attention and effort!
Ilya
Categories
- All Categories
- 145 LFX Mentorship
- 145 LFX Mentorship: Linux Kernel
- 819 Linux Foundation IT Professional Programs
- 371 Cloud Engineer IT Professional Program
- 183 Advanced Cloud Engineer IT Professional Program
- 83 DevOps Engineer IT Professional Program
- 151 Cloud Native Developer IT Professional Program
- 143 Express Training Courses & Microlearning
- 143 Express Courses - Discussion Forum
- Microlearning - Discussion Forum
- 6.7K Training Courses
- 48 LFC110 Class Forum - Discontinued
- 73 LFC131 Class Forum
- 49 LFD102 Class Forum
- 238 LFD103 Class Forum
- 22 LFD110 Class Forum
- 46 LFD121 Class Forum
- 1 LFD123 Class Forum
- LFD125 Class Forum
- 18 LFD133 Class Forum
- 9 LFD134 Class Forum
- 18 LFD137 Class Forum
- 72 LFD201 Class Forum
- 5 LFD210 Class Forum
- 5 LFD210-CN Class Forum
- 2 LFD213 Class Forum - Discontinued
- 128 LFD232 Class Forum - Discontinued
- 2 LFD233 Class Forum
- 4 LFD237 Class Forum
- 24 LFD254 Class Forum
- 726 LFD259 Class Forum
- 111 LFD272 Class Forum - Discontinued
- 4 LFD272-JP クラス フォーラム
- 13 LFD273 Class Forum
- 260 LFS101 Class Forum
- 2 LFS111 Class Forum
- 3 LFS112 Class Forum
- 3 LFS116 Class Forum
- 7 LFS118 Class Forum
- 1 LFS120 Class Forum
- 9 LFS142 Class Forum
- 8 LFS144 Class Forum
- 4 LFS145 Class Forum
- 4 LFS146 Class Forum
- 16 LFS148 Class Forum
- 15 LFS151 Class Forum
- 5 LFS157 Class Forum
- 72 LFS158 Class Forum
- LFS158-JP クラス フォーラム
- 12 LFS162 Class Forum
- 2 LFS166 Class Forum
- 7 LFS167 Class Forum
- 3 LFS170 Class Forum
- 2 LFS171 Class Forum
- 3 LFS178 Class Forum
- 3 LFS180 Class Forum
- 2 LFS182 Class Forum
- 5 LFS183 Class Forum
- 34 LFS200 Class Forum
- 737 LFS201 Class Forum - Discontinued
- 3 LFS201-JP クラス フォーラム - Discontinued
- 21 LFS203 Class Forum
- 135 LFS207 Class Forum
- 2 LFS207-DE-Klassenforum
- 2 LFS207-JP クラス フォーラム
- 302 LFS211 Class Forum
- 56 LFS216 Class Forum
- 55 LFS241 Class Forum
- 50 LFS242 Class Forum
- 38 LFS243 Class Forum
- 16 LFS244 Class Forum
- 6 LFS245 Class Forum
- LFS246 Class Forum
- LFS248 Class Forum
- 113 LFS250 Class Forum
- 2 LFS250-JP クラス フォーラム
- 1 LFS251 Class Forum
- 158 LFS253 Class Forum
- 1 LFS254 Class Forum
- 2 LFS255 Class Forum
- 13 LFS256 Class Forum
- 1 LFS257 Class Forum
- 1.3K LFS258 Class Forum
- 11 LFS258-JP クラス フォーラム
- 136 LFS260 Class Forum
- 162 LFS261 Class Forum
- 43 LFS262 Class Forum
- 82 LFS263 Class Forum - Discontinued
- 15 LFS264 Class Forum - Discontinued
- 11 LFS266 Class Forum - Discontinued
- 24 LFS267 Class Forum
- 25 LFS268 Class Forum
- 37 LFS269 Class Forum
- 7 LFS270 Class Forum
- 202 LFS272 Class Forum - Discontinued
- 2 LFS272-JP クラス フォーラム
- 4 LFS147 Class Forum
- 2 LFS274 Class Forum
- 4 LFS281 Class Forum
- 18 LFW111 Class Forum
- 262 LFW211 Class Forum
- 186 LFW212 Class Forum
- 15 SKF100 Class Forum
- 1 SKF200 Class Forum
- 2 SKF201 Class Forum
- 797 Hardware
- 199 Drivers
- 68 I/O Devices
- 37 Monitors
- 104 Multimedia
- 174 Networking
- 91 Printers & Scanners
- 85 Storage
- 762 Linux Distributions
- 82 Debian
- 67 Fedora
- 18 Linux Mint
- 13 Mageia
- 23 openSUSE
- 149 Red Hat Enterprise
- 31 Slackware
- 13 SUSE Enterprise
- 355 Ubuntu
- 470 Linux System Administration
- 39 Cloud Computing
- 71 Command Line/Scripting
- Github systems admin projects
- 95 Linux Security
- 78 Network Management
- 102 System Management
- 47 Web Management
- 70 Mobile Computing
- 19 Android
- 38 Development
- 1.2K New to Linux
- 1K Getting Started with Linux
- 380 Off Topic
- 116 Introductions
- 177 Small Talk
- 26 Study Material
- 810 Programming and Development
- 305 Kernel Development
- 487 Software Development
- 1.8K Software
- 263 Applications
- 183 Command Line
- 3 Compiling/Installing
- 988 Games
- 317 Installation
- 104 All In Program
- 104 All In Forum
Upcoming Training
-
August 20, 2018
Kubernetes Administration (LFS458)
-
August 20, 2018
Linux System Administration (LFS301)
-
August 27, 2018
Open Source Virtualization (LFS462)
-
August 27, 2018
Linux Kernel Debugging and Security (LFD440)