-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkword_finder.c
84 lines (75 loc) · 1.72 KB
/
kword_finder.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <dirent.h>
#include <stdlib.h>
#include <stdbool.h>
#include <limits.h>
#define WORD_MAX 1000
#define NR_FILES 150
#define NR_WORDS 100000000
typedef struct {
long long count;
char *word;
} keyword;
int main(void)
{
long long nrkw = 0;
keyword *list = calloc(NR_WORDS, sizeof(keyword));
FILE *ref = fopen("tasks/spam_detector/tests/01-spam_detector/01-spam_detector.ref", "r");
printf("x\n");
int spam = 0;
for (long long i = 0; i < NR_FILES; i++) {
fscanf(ref, "%d", &spam);
while (spam && i < NR_FILES) {
i++;
fscanf(ref, "%d", &spam);
}
if (i >= 150)
break;
char path[PATH_MAX];
snprintf(path, PATH_MAX, "data/emails/%lld", i);
FILE *email = fopen(path, "r");
printf("%lld\n", i);
char currword[WORD_MAX];
while (fscanf(email, "%s", currword) != EOF) {
if (!strcmp(currword, "Body:")) {
break;
}
}
while (fscanf(email, "%s", currword) != EOF) {
int found = 0;
for (long long j = 0; j < nrkw; j++) {
if (!strcasecmp(currword, list[j].word)) {
found = 1;
list[j].count++;
break;
}
}
if (!found) {
list[nrkw].word = calloc(WORD_MAX, sizeof(char));
snprintf(list[nrkw].word, WORD_MAX, "%s", currword);
list[nrkw].count = 1;
nrkw++;
}
}
}
printf("x\n");
for (long long i = 0; i < nrkw - 1; i++) {
for (long long j = i + 1; j < nrkw; j++) {
if (list[i].count < list[j].count) {
keyword temp = list[i];
list[i] = list[j];
list[j] = temp;
}
}
}
FILE *output = fopen("total_list_ok", "w");
for (long long i = 0; i < nrkw; i++) {
fprintf(output, "%lld %s\n", list[i].count, list[i].word);
free(list[i].word);
}
free(list);
return 0;
}