1
#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>
struct thread_data {
FILE *fp;
long int offset;
int start;
int blockSize;
//struct word maybe?
};
int words = 0;
void *countFrequency(void* data) {
struct thread_data* td = data;
char *buffer = malloc(td->blockSize);
int i, c;
i = 0; c = 0;
enum states { WHITESPACE, WORD };
int state = WHITESPACE;
fseek(td->fp, td->offset, td->start);
char last = ' ';
while ((fread(buffer, td->blockSize, 1, td->fp)) == 1) {
if (buffer[0]== ' ' || buffer[0] == '\t') {
state = WHITESPACE;
} else if (buffer[0] == '\n') {
//newLine++;
state = WHITESPACE;
} else {
if (state == WHITESPACE) {
words++;
}
state = WORD;
}
last = buffer[0];
}
free(buffer);
pthread_exit(NULL);
return NULL;
}
int main(int argc, char **argv) {
int nthreads, x, id, blockSize, len;
//void *state;
FILE *fp;
pthread_t *threads;
fp = fopen("file1.txt", "r");
printf("Enter the number of threads: ");
scanf("%d", &nthreads);
struct thread_data data[nthreads];
threads = malloc(nthreads * sizeof(pthread_t));
fseek(fp, 0, SEEK_END);
len = ftell(fp);
printf("len= %d\n", len);
blockSize = (len + nthreads - 1)/nthreads;
printf("size= %d\n", blockSize);
for (id = 0; id < nthreads; id++) {
data[id].fp = fp;
data[id].offset = blockSize;
data[id].start = id * blockSize + 1;
//maybe data[id]. word struct
}
//LAST THREAD
data[nthreads-1].start=(nthreads-1)*blockSize+1;
for (id = 0; id < nthreads; id++)
pthread_create(&threads[id], NULL, &countFrequency,&data[id]);
for (id = 0; id < nthreads; id++)
pthread_join(threads[id],NULL);
fclose(fp);
printf("%d\n",words);
return 0;
}
我有一個分段故障,我在這個程序中修復,但現在當我運行它時,我得到0個字,這是不正確的,因爲在文本文件中有大約一百萬字。爲什麼我的程序不能輸出正確的字數?
誰能告訴我爲什麼它給我一個不正確的字數?
@jgabb您在countFrequency()中的fseek不正確......請查看手冊頁的第三個參數。 – TonyB 2014-12-03 03:46:02
至於countFrequency中的文件描述符,它是否應該使用相同的文件描述符,因爲它指向同一個文件? – jgabb 2014-12-03 04:53:18
@jgabb照片3線程...第一個從偏移量1開始,第二個從偏移量3001開始,第三個從偏移量6001開始。每個讀取3000個字節。線程之一做一個fseek()來抵消1,但在它可以讀取線程2接管之前。然後它做一個fseek()來抵消3001,但在它讀取之前,線程3接管。線程3 fseek()到6001 ...現在線程再次得到控制,它認爲它在偏移量1,實際上它是在偏移量6001 ...你看到問題...你需要同步訪問,或者將整個文件讀入內存並按照這種方式進行處理。 – TonyB 2014-12-03 04:59:43