-
Notifications
You must be signed in to change notification settings - Fork 39
/
Copy pathanagram_having_given_hash.py
125 lines (102 loc) · 3.62 KB
/
anagram_having_given_hash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/python
# Date: 2018-10-10
#
# Description:
# Given a string and 3 md5 hashes, find the anagram of given string which has
# given md5 hash.
# You are also given a wordlist file having around 0.1 million words, which
# might help you out.
# This is the wordlist:
# https://drive.google.com/file/d/13qjOPs4R6nHZcKyArRHHZZl62nQf8AON/view?usp=sharing
#
# Approach:
# - Filter words from wordlist file given, which can't be anagram.
# - Find the length of maximum string left in wordlist after filtering, it
# comes out to be 11.
# - We know length of input string is 18(20 - 2) so we find all triplets which
# sums to 18 and compute all combinations.
#
# Solution:
# printout stout yawls
# ty outlaws printouts
import collections
import hashlib
def is_valid(word, char_freq):
"""
Checks whether given word has character counts not more than counts given in
dictionary.
Args:
word: A word from wordlist.
char_freq: Dictionary with frequency of each character in input string.
"""
char_count = collections.defaultdict(int)
for c in word:
char_count[c] += 1
for c in char_count:
if char_count[c] > char_freq[c]:
return False
return True
def get_valid_words(char_count):
"""
Reads wordlist and filter words which can be part of anagram of given string.
Args:
char_count: Dictionary with frequency of each character in input string.
"""
with open('/home/hansraj/Downloads/wordlist') as fp:
valid_words = []
word = fp.readline().strip()
while word:
if is_valid(word, char_count):
valid_words.append(word)
word = fp.readline()
if word:
word = word.strip()
return valid_words
def string_anagram_with_given_md5hash(string, length, expected_hash):
"""Finds anagram of given string which has expected md5 hash.
Args:
string: Input string.
length: Length of input string.
expected_hash: Tuple with expected hash values.
"""
# Compute count of each character in input string.
char_count = collections.defaultdict(int)
for c in string:
char_count[c] += 1
valid_words = get_valid_words(char_count)
# Create a dictionary to group all words with same length.
word_lengths = collections.defaultdict(list)
for word in valid_words:
word_lengths[len(word)].append(word)
# We are left with 1788 words from wordlist which can be part of anagram of
# given string.
# Also strings with which we are left with has lengths from 1 to 11.
# And length of given string 'poultry outwits ants' is 20 and if we exclude
# 2 spaces we are left with string length of 18. So effectively now we have
# to generate a string which has length of 18 using 1788 words.
triplets_with_sum_18 = []
max_len = max(l for l in word_lengths) # 11
# Find triplets which sums to 18, using numbers from 1 to 11.
for i in range(1, max_len + 1):
for j in range(1, max_len + 1):
for k in range(1, max_len + 1):
if i + j + k == length - 2: # -2 for 2 spaces
triplets_with_sum_18.append((i, j, k))
# Iterate over all triplets and check for strings having required lengths.
for i, j, k in triplets_with_sum_18:
for s1 in word_lengths[i]:
for s2 in word_lengths[j]:
for s3 in word_lengths[k]:
string = s1 + ' ' + s2 + ' ' + s3
if hashlib.md5(string).hexdigest() in expected_hash:
print(string)
def main():
string = 'poultry outwits ants'
hash_values = (
'e4820b45d2277f3844eac66c903e84be',
'23170acc097c24edb98fc5488ab033fe',
'665e5bcb0c20062fe8abaaf4628bb154'
)
string_anagram_with_given_md5hash(string, len(string), hash_values)
if __name__ == '__main__':
main()