2017-10-21 73 views
1

這應該在循環(服務器)中工作,並將工作/查詢委託給由longrun()函數調用表示的錯誤庫到超時tmax = 3s的線程。我放置了同步變量,我試圖等待不超過這個限制,但是當longrun()掛起(運行4)時,它仍然等待全部時間(7s)而不是所請求的限制。誰能解釋一下?爲什麼pthread_cond_timedwait在指定的時間限制之後不會觸發?

#include <unistd.h> 
#include <errno.h> 
#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#include <time.h> 
#include <pthread.h> 
#include <sys/time.h> 

#include <iostream> 
using namespace std; 


string int2str(int i){ 
    char buf[10]; // no larger int passed we hope 
    int end = sprintf(buf, "%d", i); 
    buf[end] = '\0'; 
    return string(buf); 
} 
string longrun(int qi){ 
    if(qi % 4 == 0) { 
     sleep(7); 
     return string("'---- to: ") + int2str(qi) + string("' (hang case)"); 
    } 
    else { 
     sleep(1); 
     return string("'okay to: ") + int2str(qi) + string("'"); 
    } 
} 


struct tpack_t {   // thread pack 
    pthread_t thread; 
    pthread_mutex_t mutex; 
    pthread_cond_t go;  // have a new value to run 
    pthread_cond_t ready; // tell main thread we're done processing 
    int newq;    // predicate on go+ready condition for wait 
    int qi;     // place question as int to thread: question-int 
    string res;    // where i place the response 
    tpack_t(); 
}; 
tpack_t::tpack_t() { 
    pthread_mutex_init (&mutex, NULL); 
    pthread_cond_init (&go, NULL); 
    pthread_cond_init (&ready, NULL); 
    newq = 0; 
} 
void set_cond_time(timespec* ctp, int tmax){ 
    timeval now; 
    gettimeofday(&now, NULL); 
    ctp->tv_nsec = now.tv_usec * 1000UL; 
    ctp->tv_sec = now.tv_sec + tmax; // now + max time! 
    printf("[m] ... set to sleep for %d sec, i hope...\n", tmax); 
} 

void take_faulty_evasive_action(tpack_t* tpx){ 
    // basically kill thread, clean faulty library copy (that file) and restart it 
    cout << "will work on it (restarting thread) soon!\n"; 
    tpx->newq = 0; // minimal action for now... 
} 

void* faulty_proc(void* arg){ 
    tpack_t* tpx = (tpack_t*) arg; 
    while(true){ 
     pthread_mutex_lock(&tpx->mutex); 
     while(tpx->newq == 0){ 
      pthread_cond_wait(&tpx->go, &tpx->mutex); 
     } 
     printf("[t] to process : %d\n", tpx->qi); fflush(stdout); 
     // now i have a new value in qi, process it and place the answer in... res 
     tpx->res = longrun(tpx->qi); 
     tpx->newq = 0; 
     pthread_mutex_unlock(&tpx->mutex); 
     pthread_cond_signal(&tpx->ready); 
    } 
} 


int main(int argc, char* argv[]){ 

    cout << "\n this presents the problem: idx = 4k -> hang case ...\n (challenge is to eliminate them by killing thread and restarting it)\n\n"; 
    printf(" ETIMEDOUT = %d EINVAL = %d EPERM = %d\n\n", ETIMEDOUT, EINVAL, EPERM); 

    tpack_t* tpx = new tpack_t(); 
    pthread_create(&tpx->thread, NULL, &faulty_proc, (void*) tpx); 

    // max wait time; more than that is a hanging indication! 
    int numproc = 5; 
    ++numproc; 
    int tmax = 3; 
    timespec cond_time; 
    cond_time.tv_nsec = 0; 
    int status, expired; // for timed wait on done condition! 


    time_t t0 = time(NULL); 
    for(int i=1; i<numproc; ++i){ 
     expired = 0; 

     pthread_mutex_lock(&tpx->mutex); 
     tpx->qi = i; // init the question 
     tpx->newq = 1; // ... predicate 
     //pthread_mutex_unlock(&tpx->mutex); 
     pthread_cond_signal(&tpx->go); // let it know that... 

     while(tpx->newq == 1){ 

      /// ---------------------- most amazing region, timedwait waits all the way! ---------------------- 
      set_cond_time(&cond_time, tmax); // time must be FROM NOW! (abs time, not interval) 
      time_t wt0 = time(NULL); 
      status = pthread_cond_timedwait(&tpx->ready, &tpx->mutex, &cond_time); 
      printf("[m] ---- \t exited with status = %d (after %.2fs)\n", status, difftime(time(NULL), wt0)); 
      /// ----------------------------------------------------------------------------------------------- 

      if (status == ETIMEDOUT){ 
       printf("\t ['t was and newq == %d]\n", tpx->newq); 
       if(tpx->newq == 1){ // check one more time, to elim race possibility 
        expired = 1; 
        break; 
       } 
      } 
      else if(status != 0){ 
       fprintf(stderr, "cond timewait for faulty to reply errored out\n"); 
       return 1; 
      } 
     } 

     if(expired){ 
      take_faulty_evasive_action(tpx); // kill thread, start new one, report failure below 
      cout << "[m] :: interruption: default bad answer goes here for " << i << "\n\n"; 
     } 
     else { 
      cout << "[m] :: end with ans: " << tpx->res << endl << endl; 
     } 
     pthread_mutex_unlock(&tpx->mutex); 
    } 
    time_t t1 = time(NULL); 
    printf("took %.2f sec to run\n", difftime(t1, t0)); 
} 

用'g ++ -pthread code.cc'在linux下編譯(ubuntu 16.04)。輸出是:

this presents the problem: idx = 4k -> hang case ... 
    (challenge is to eliminate them by killing thread and restarting it) 

    ETIMEDOUT = 110 EINVAL = 22 EPERM = 1 

[m] ... set to sleep for 3 sec, i hope... 
[t] to process : 1 
[m] ----  exited with status = 0 (after 1.00s) 
[m] :: end with ans: 'okay to: 1' 

[m] ... set to sleep for 3 sec, i hope... 
[t] to process : 2 
[m] ----  exited with status = 0 (after 1.00s) 
[m] :: end with ans: 'okay to: 2' 

[m] ... set to sleep for 3 sec, i hope... 
[t] to process : 3 
[m] ----  exited with status = 0 (after 1.00s) 
[m] :: end with ans: 'okay to: 3' 

[m] ... set to sleep for 3 sec, i hope... 
[t] to process : 4 
[m] ----  exited with status = 110 (after 7.00s) 
    ['t was and newq == 0] 
[m] :: end with ans: '---- to: 4' (hang case) 

[m] ... set to sleep for 3 sec, i hope... 
[t] to process : 5 
[m] ----  exited with status = 0 (after 1.00s) 
[m] :: end with ans: 'okay to: 5' 

took 11.00 sec to run 

回答

0

的問題是,faulty_proc()保持tpx->mutex鎖定,同時它調用longrun(),並pthread_cond_timedwait()呼叫main()不能返回,直到它可以重新獲得互斥體,即使超時。

如果longrun()不需要互斥鎖被鎖定 - 而且似乎是這種情況 - 您可以在設置完成標誌並指示條件變量之前解鎖該調用的互斥鎖並重新鎖定它。

+0

對,爲了獲得互斥量,它必須在第一位!現在我可以進一步調整以完成剩下的工作...... – vuvu

相關問題