-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbrute.py
30 lines (23 loc) · 1.05 KB
/
brute.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import re
from charset_normalizer import from_bytes
dados_sujos = []
dados_limpos = []
regex = re.compile(r'[^a-zA-Z0-9!@#$%&*()/,:.\"\' \-_;]')
with open('entrada.csv', 'rb') as entrada_CSV:
dados_sujos.append(str(from_bytes(entrada_CSV.read()).best()))
dicionario = {'č': 'ã', '√Ī': 'ñ', 'Ňü': 'ş', '√≥': 'ó', 'ć': 'ç',
'√ď': 'Ó', '√ß': 'ç', '√ę': 'ë', '‚Äď': '–', 'ńá': 'ć',
'√ľ': 'ü', 'ńü': 'ğ', '√Č': 'É', 'ź': 'ê', '√°': 'á',
'‚ā™': '', '√∂': 'ö', '√į': 'ð', 'Ňě': 'Ş', '√≠': 'í',
'í': 'í', '√•': 'å', 'á': 'á', '√®': 'è', '√©': 'é', 'ńĪ': 'ı'}
for linha in dados_sujos:
temp = linha
for d in dicionario:
if temp.__contains__(d):
temp = temp.replace(d, dicionario[d])
dados_limpos.append(temp)
with open('saida.csv', 'w') as out:
out.writelines(dados_limpos)
with open('saida.csv', 'r') as output:
verificacao = set(regex.findall(output.read()))
print(verificacao)