跳轉到內容

Linux 應用程式除錯技術/資源洩漏

來自華夏公益教科書,開放的書籍,為開放的世界

殭屍執行緒

[編輯 | 編輯原始碼]

任何已終止但未被加入或分離的執行緒都會洩漏作業系統資源,直到程序終止。不幸的是,無論是/proc還是gdb都不會顯示這些殭屍執行緒,至少在某些核心上不會。

一種獲取它們的方法是使用 gdb 預定義命令

#
#
#
define trace_call
    b $arg0
    commands
    bt full
    continue
    end
end
document trace_call
Trace specified call with call stack to screen. Example:
    set breakpoint pending on
    set pagination off
    set logging on
    trace_call __pthread_create_2_1
end
Using host libthread_db library "/lib/i686/cmov/libthread_db.so.1".
(gdb) trace_call __pthread_create_2_1
Function "__pthread_create_2_1" not defined.
Breakpoint 1 (__pthread_create_2_1) pending.
(gdb) trace_call __pthread_create_2_0
Function "__pthread_create_2_0" not defined.
Breakpoint 2 (__pthread_create_2_0) pending.
(gdb) r
Starting program: /home/amelinte/projects/articole/wikibooks/debug/plock foo bar bax
[Thread debugging using libthread_db enabled]
Breakpoint 3 at 0xb7f9b746
Pending breakpoint "__pthread_create_2_1" resolved
Breakpoint 4 at 0xb7f9c395
Pending breakpoint "__pthread_create_2_0" resolved
[New Thread 0xb7e48ad0 (LWP 8635)]
[Switching to Thread 0xb7e48ad0 (LWP 8635)]

Breakpoint 3, 0xb7f9b746 in pthread_create@@GLIBC_2.1 () from /lib/i686/cmov/libpthread.so.0
#0  0xb7f9b746 in pthread_create@@GLIBC_2.1 () from /lib/i686/cmov/libpthread.so.0
No symbol table info available.
#1  0x08048a7f in main (argc=4, argv=0xbfceb714) at plock.c:97
        s = 0
        tnum = 0
        opt = -1
        num_threads = 3
        tinfo = (struct thread_info *) 0x833b008
        attr = {__size = '\0' <repeats 13 times>, "\020", '\0' <repeats 21 times>, __align = 0}
        stack_size = -1
        res = (void *) 0x0
[New Thread 0xb7e47b90 (LWP 8638)]
Thread 1: top of stack near 0xb7e473c8; argv_string=foo

另一種方法是使用(同樣)一箇中間層庫

/*
 *  Hook library. Usage: 
 *    gcc -c -g -Wall -fPIC libhook.c -o libhook.o 
 *    ld -o libhook.so libhook.o -shared -ldl
 *    LD_PRELOAD=./libhook.so program arguments
 * 
 *  Copyright 2012 Aurelian Melinte. 
 *  Released under GPL 3.0 or later. 
 */

#define _GNU_SOURCE
#include <dlfcn.h>

#include <signal.h>
#include <execinfo.h>

#include <errno.h>
#include <stdlib.h>
#include <stdio.h>  /*printf*/
#include <unistd.h>

#include <pthread.h>

#include <assert.h>



typedef int (*lp_pthread_mutex_func)(pthread_mutex_t *mutex);
typedef int (*pthread_create_func)(pthread_t *thread, 
                                   const pthread_attr_t *attr,
								   void *(*start_routine) (void *), void *arg);
static pthread_create_func  _pthread_create_hook = NULL;


static int
hook_one(pthread_create_func *fptr, const char *fname)
{
    char *msg = NULL;

    assert(fname != NULL);

    if (*fptr == NULL) {
        printf("dlsym : wrapping %s\n", fname);
        *fptr = dlsym(RTLD_NEXT, fname);
        printf("next_%s = %p\n", fname, *fptr);
        if ((*fptr == NULL) || ((msg = dlerror()) != NULL)) {
            printf("dlsym %s failed : %s\n", fname, msg);
            return -1;
        } else {
            printf("dlsym: wrapping %s done\n", fname);
            return 0;
        }
    } else {
        return 0;
    }
}


static void
hook_funcs(void)
{
    if (_pthread_create_hook == NULL) {
        int rc = hook_one(&_pthread_create_hook, "pthread_create"); 
        if (NULL == _pthread_create_hook || rc != 0) {
            printf("Failed to hook.\n");
            exit(EXIT_FAILURE);
        }
    }
}


/*
 *
 */
 

int 
pthread_create(pthread_t *thread, 
               const pthread_attr_t *attr,
               void *(*start_routine) (void *), void *arg)
{
#define SIZE 40
    void *buffer[SIZE] = {0};
	int nptrs = 0;

    int rc = EINVAL; 
	
	rc = _pthread_create_hook(thread, attr, start_routine, arg);

    printf("*** pthread_create:\n");
    nptrs = backtrace(buffer, SIZE);
    backtrace_symbols_fd(buffer, nptrs, STDOUT_FILENO);

    return rc; 
}

/*
 *
 */
 
void _init()  __attribute__((constructor));
void 
_init()
{
    printf("*** _init().\n");
    hook_funcs();
}


void  _fini()  __attribute__((destructor)); 
void  
_fini()
{
    printf("*** _fini().\n");
}

輸出有點粗糙,但可以透過替換以下內容來細化到檔案和行backtrace_symbols_fd()使用適當的程式碼

*** pthread_create:
./libhook.so(pthread_create+0x8c)[0x400215d3]
./plock[0x8048a7f]
/lib/i686/cmov/libc.so.6(__libc_start_main+0xe0)[0x4006f450]
./plock[0x8048791]

檔案描述符

[編輯 | 編輯原始碼]

由於幾乎所有東西都是檔案(資料夾、套接字、管道等),因此幾乎所有東西都可能導致需要關閉的檔案描述符。/proc可以幫助

# tree /proc/26041
/proc/26041
...
|-- fd                  # Open files descriptors
|   |-- 0 -> /dev/pts/21
|   |-- 1 -> /dev/pts/21
|   |-- 2 -> /dev/pts/21
|   `-- 3 -> socket:[113497835]
|-- fdinfo
|   |-- 0
|   |-- 1
|   |-- 2
|   `-- 3
...

trace_call命令用於gdb可以幫助檢視呼叫堆疊。

如果機器上沒有 gdb,則可以使用一個掛鉤中間層庫open(), pipe(), socket()等可以構建。

其他可用的工具

  • lsof
  • fuser

哪個程序正在使用埠?作為 root

# netstat -tlnp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address               Foreign Address             State       PID/Program name   
tcp        0      0 0.0.0.0:36510               0.0.0.0:*                   LISTEN      -                   
tcp        0      0 127.0.0.1:2207              0.0.0.0:*                   LISTEN      3438/python         
...
# lsof
COMMAND     PID             USER   FD      TYPE             DEVICE       SIZE       NODE NAME
init          1             root  cwd       DIR              253,0       4096          2 /
...
python     3438             root    4u     IPv4              11416                   TCP localhost.localdomain:2207 (LISTEN)

# lsof -i :2207
COMMAND  PID USER   FD   TYPE DEVICE SIZE NODE NAME
python  3438 root    4u  IPv4  11416       TCP localhost.localdomain:2207 (LISTEN)

其他工具

  • fuser

用於訊號量、共享記憶體和訊息佇列。

  • ipcs
  • ipcrm
# ipcs -spt
------ Semaphore Operation/Change Times --------
semid    owner      last-op                    last-changed              
187826177 aurelian_m  Fri Feb 10 09:37:26 2012   Fri Feb 10 09:33:39 2012  
187858946 aurelian_m  Fri Feb 10 09:52:11 2012   Fri Feb 10 09:50:44 2012

DIY:一箇中間層資源計數器

[編輯 | 編輯原始碼]

libmemleak 可以輕鬆修改以跟蹤洩漏的任何資源。掛鉤正確的 API(例如open()/close()).

華夏公益教科書