-
Notifications
You must be signed in to change notification settings - Fork 596
/
Copy pathsubbreak3.h
151 lines (117 loc) · 4.08 KB
/
subbreak3.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
/*! \file subbreak3.h
* \brief Breaking non-exact substitution ciphers
* \author Georgi Gerganov
*/
#pragma once
#include "common.h"
#include <map>
#include <cmath>
#include <vector>
#include <string>
#include <unordered_map>
namespace Cipher {
using TCode = int32_t;
using TProb = float;
using TGramLen = int32_t;
using THint = std::vector<int32_t>;
using TPlainText = std::vector<TLetter>;
using TLetterCount = std::vector<int>;
using TClusterCount = std::vector<int>;
using TClusterPos = std::vector<std::vector<int>>;
struct TParameters {
// clustering params
int minClusters = 23;
int maxClusters = 32;
int nInitialIters = 1000;
int nIters = 100;
double fSpread = 1.0;
// simulated annealing params
float temp0 = 0.0001;
float coolingRate = 0.95;
// language model
float wEnglishFreq = 10.0f;
// beam search
int nHypothesesToKeep = 500;
THint hint = {};
};
struct TFreqMap {
TGramLen len = -1;
int64_t nTotal = 0;
TProb pmin = 0;
std::unordered_map<TCode, TProb> prob;
};
struct TResult {
int32_t id = 0;
TProb p = -999.0;
double pClusters = -999.0;
TClusterToLetterMap clMap;
TClusters clusters;
};
TCode calcCode(const char * data, int n);
// n-grams with lower probability than pmin are assigned cost = log10(pmin)
bool loadFreqMap(const char * fname, TFreqMap & res, double pmin = 0.000001);
bool saveFreqMapBinary(const char * fname, const TFreqMap & res);
bool loadFreqMapBinary(const char * fname, TFreqMap & res);
bool encryptExact(const TParameters & params, const std::string & text, TClusters & clusters);
bool beamSearch(
const TParameters & params,
const TFreqMap & freqMap,
TResult & result);
bool refineNearby(
const TParameters & params,
const TFreqMap & freqMap,
TResult & result);
bool generateClustersInitialGuess(
const TParameters & params,
const TSimilarityMap & ccMap,
TClusters & clusters);
bool mutateClusters(const TParameters & params, TClusters & clusters);
double calcPClusters(
const TParameters & ,
const TSimilarityMap & ,
const TSimilarityMap & logMap,
const TSimilarityMap & logMapInv,
const TClusters & clusters,
const TClusterToLetterMap & clMap);
bool normalizeSimilarityMap(
const TParameters & ,
TSimilarityMap & ccMap,
TSimilarityMap & logMap,
TSimilarityMap & logMapInv);
char getEncodedChar(TClusterId);
TLetter decode(const TClusters & t, int idx, const TClusterToLetterMap & clMap, const THint & hint);
void printEncoded(const TClusters & t);
void printDecoded(const TClusters & t, const TClusterToLetterMap & clMap, const THint & hint);
void printPlain(const std::vector<TLetter> & t);
class Processor {
public:
Processor();
bool init(
const TParameters & params,
const TFreqMap & freqMap,
const TSimilarityMap & similarityMap);
bool setHint(const THint & hint);
std::vector<TResult> getClusterings(int nClusterings);
bool compute();
int getIters() const { return m_nInitialIters; }
const TResult & getResult() const;
const TSimilarityMap & getSimilarityMap() const;
private:
TParameters m_params;
const TFreqMap* m_freqMap = nullptr;
TSimilarityMap m_similarityMap;
TSimilarityMap m_logMap;
TSimilarityMap m_logMapInv;
int m_nInitialIters = 0;
double m_pCur = 0.0f;
double m_pZero = 0.0f;
TResult m_curResult;
};
float findBestCutoffFreq(
const TWaveformF & waveform,
EAudioFilter filterId,
int64_t sampleRate,
float minCutoffFreq_Hz,
float maxCutoffFreq_Hz,
float step_Hz);
}