-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathp220.erl
80 lines (65 loc) · 2.5 KB
/
p220.erl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
-module(p220).
-export([split/1, splitAll/3, indexing/1]).
%% remove blank elements of the list of splited list (because my solution for spliting texting string remains some [] in the result)
rm([],Rs)->Rs;
rm([H|T],Rs) when H=/=[]->rm(T,[H|Rs]);
rm([_|T],Rs)->rm(T,Rs).
%% split the texting strings into the list of words and uncapitalise them
split(L)->split(L,[[]]).
split([],Rs)->rm(Rs,[]);
split([H|T],[[]])->
if H>=$A, H=<$Z->split(T,[[H+32]]);
H>=$a, H=<$z->split(T,[[H]]);
true->split(T,[[]])
end;
split([H|T],[RH|RT])->
if H>=$A, H=<$Z->split(T,[RH++[H+32]|RT]);
H>=$a, H=<$z->split(T,[RH++[H]|RT]);
true->split(T,[[]]++[RH|RT])
end.
%% split all texting string of a file
splitAll([],Rs,_)->Rs;
splitAll([H|T],Rs,I)->splitAll(T,Rs++[{I,split(H)}],I+1).
%% check whether a word in appears in a line or not, if it appears, add the line into the result
checkAppearance(_,{_,[]}, Rs)->Rs;
checkAppearance(W,{Line,[W|_]},{W,L})->{W,L++[Line]};
checkAppearance(W,{Line,[_|Remaining]},Rs)->checkAppearance(W,{Line,Remaining},Rs).
%% check whether a we have checked a word before or not
checkIfChecked(_,[])->false;
checkIfChecked(W,[{W,_}|_])->true;
checkIfChecked(W,[_|Remaining])->checkIfChecked(W,Remaining).
%% check a word in a line whether it appear in other lines
checkWordAllLines(_,[],Rs)->Rs;
checkWordAllLines(W,[H|T],Rs)->checkWordAllLines(W,T,checkAppearance(W,H,Rs)).
%% check each word in a line
checkAllWords({_,[]},_,Rs)->Rs;
checkAllWords({Line,[W|Tail]},ListOfLines,Rs)->
case checkIfChecked(W,Rs) of
false->checkAllWords({Line,Tail},ListOfLines,Rs++[checkWordAllLines(W,ListOfLines,{W,[Line]})]);
true->checkAllWords({Line,Tail},ListOfLines,Rs)
end.
%% sort words in lexicographic order
sort([])->[];
sort([{PivotWord,Lines}|Remaining])->
sort([X||X={Word,_}<-Remaining,Word=<PivotWord])++[{PivotWord,Lines}]++sort([X||X={Word,_}<-Remaining,Word>PivotWord]).
%% check of all lines
checkAllLines([],Rs)->Rs;
checkAllLines([FirstLine|Tail],Rs)->checkAllLines(Tail,Rs++checkAllWords(FirstLine,Tail,[])).
% check mem
mem(_,[])->false;
mem({W,_},[{W,_}|_])->true;
mem(Check,[_|Remaining])->mem(Check,Remaining).
%% remove duplicates
rmdup([],Rs)->Rs;
rmdup([H|T],Rs)->
case mem(H,T) of
true->rmdup(T,Rs);
false->rmdup(T,Rs++[H])
end.
%% indexing a file
indexing(FileName)->
Contents=index:get_file_contents(FileName),
ListOfLines=splitAll(Contents,[],1),
rmdup(sort(checkAllLines(ListOfLines,[])),[]).
%indexing(Name)->
% Contents=get_file_contents(Name),