forked from QEC-project-2020/EWD-QEC
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_data_noise_models.py
248 lines (205 loc) · 10.4 KB
/
generate_data_noise_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import copy # not used
import os
import sys
import time
import numpy as np
import pandas as pd
from src.toric_model import Toric_code
from src.planar_model import Planar_code
from src.mcmc import *
from decoders import *
from src.mwpm import *
# This function generates training data with help of the MCMC algorithm
def generate(file_path, params, max_capacity=10**4, nbr_datapoints=10**6, fixed_errors=None):
if params['code'] == 'planar':
nbr_eq_class = 4
elif params['code'] == 'toric':
nbr_eq_class = 16
if params['method'] == "all":
nbr_eq_class *= 3
# Creates data file if there is none otherwise adds to it
try:
df = pd.read_pickle(file_path)
nbr_existing_data = df.index[-1][0] + 1
except:
df = pd.DataFrame()
nbr_existing_data = 0
print('\nDataFrame with ' + str(nbr_existing_data) +
' datapoints opened at: ' + str(file_path))
# Stop the file from exceeding the max limit of nbr of datapoints
nbr_to_generate = min(max_capacity-nbr_existing_data, nbr_datapoints)
if nbr_to_generate < nbr_datapoints:
print('Generating ' + str(max(nbr_to_generate, 0))
+ ' datapoins instead of ' + str(nbr_datapoints)
+ ', as the given number would overflow existing file')
if fixed_errors != None:
nbr_to_generate = 10000000
failed_syndroms = 0
df_list = [] # Initiate temporary list
# Loop to generate data points
for i in range(nbr_existing_data, nbr_existing_data + nbr_to_generate):
print('Starting generation of point nr: ' + str(i + 1))
# Initiate code
if params['code'] == 'toric':
init_code = Toric_code(params['size'])
init_code.generate_random_error(params['p_error'])
elif params['code'] == 'planar':
init_code = Planar_code(params['size'])
if 'p_xyz' in params:
init_code.generate_general_noise_error(params['p_xyz'])
else:
init_code.generate_random_error(params['p_error'])
# Flatten initial qubit matrix to store in dataframe
df_qubit = copy.deepcopy(init_code.qubit_matrix)
eq_true = init_code.define_equivalence_class()
if params['mwpm_init']: #get mwpm starting points
init_code = class_sorted_mwpm(init_code)
print('Starting in MWPM state')
else: #randomize input matrix, no trace of seed.
init_code.qubit_matrix, _ = init_code.apply_random_logical()
init_code.qubit_matrix = init_code.apply_stabilizers_uniform()
print('Starting in random state')
# Generate data for DataFrame storage OBS now using full bincount, change this
if params['method'] == "PTEQ":
df_eq_distr = PTEQ(init_code, params['p_error'])
if np.argmax(df_eq_distr) != eq_true:
print('Failed syndrom, total now:', failed_syndroms)
failed_syndroms += 1
if params['method'] == "PTDC":
df_eq_distr, conv = PTDC(init_code, params['p_error'], params['p_sampling'])
if np.argmax(df_eq_distr) != eq_true:
print('Failed syndrom, total now:', failed_syndroms)
failed_syndroms += 1
if params['method'] == "PTRC":
df_eq_distr, conv = PTRC(init_code, params['p_error'], params['p_sampling'])
if np.argmax(df_eq_distr) != eq_true:
print('Failed syndrom, total now:', failed_syndroms)
failed_syndroms += 1
elif params['method'] == "STDC":
df_eq_distr = STDC(init_code, params['size'], params['p_error'], params['p_sampling'], steps=params['steps'], droplets=params['droplets'])
df_eq_distr = np.array(df_eq_distr)
if np.argmax(df_eq_distr) != eq_true:
print('Failed syndrom, total now:', failed_syndroms)
failed_syndroms += 1
elif params['method'] == "ST":
df_eq_distr = single_temp(init_code, params['p_error'],params['steps'])
df_eq_distr = np.array(df_eq_distr)
if np.argmin(df_eq_distr) != eq_true:
print('Failed syndrom, total now:', failed_syndroms)
failed_syndroms += 1
elif params['method'] == "STRC":
df_eq_distr = STRC(init_code, params['size'], params['p_error'], p_sampling=params['p_sampling'], steps=params['steps'], droplets=params['droplets'])
df_eq_distr = np.array(df_eq_distr)
if np.argmax(df_eq_distr) != eq_true:
print('Failed syndrom, total now:', failed_syndroms)
failed_syndroms += 1
elif params['method'] == "all":
#init_code.qubit_matrix = init_code.apply_stabilizers_uniform()
df_eq_distr1 = single_temp(init_code, params['p_error'],params['steps'])
#init_code.qubit_matrix = init_code.apply_stabilizers_uniform()
df_eq_distr2 = STDC(init_code, params['size'], params['p_error'], p_sampling=params['p_sampling'], steps=params['steps'], droplets=params['droplets'])
#init_code.qubit_matrix = init_code.apply_stabilizers_uniform()
df_eq_distr3 = STRC(init_code, params['size'], params['p_error'], p_sampling=params['p_sampling'], steps=params['steps'], droplets=params['droplets'])
df_eq_distr = np.concatenate((df_eq_distr1,df_eq_distr2,df_eq_distr3), axis=0)
elif params['method'] == "eMWPM":
out = class_sorted_mwpm(copy.deepcopy(init_code))
lens = np.zeros((4))
for j in range(4):
lens[j] = out[j].count_errors()
choice = np.argmin(lens)
df_eq_distr = np.zeros((4)).astype(np.uint8)
df_eq_distr[choice] = 100
if np.argmax(df_eq_distr) != eq_true:
print('Failed syndrom, total now:', failed_syndroms)
failed_syndroms += 1
elif params['method'] == "MWPM":
choice = regular_mwpm(copy.deepcopy(init_code))
df_eq_distr = np.zeros((4)).astype(np.uint8)
df_eq_distr[choice] = 100
if np.argmax(df_eq_distr) != eq_true:
print('Failed syndrom, total now:', failed_syndroms)
failed_syndroms += 1
elif params['method'] == "uncorrelated_comparison":
mwpm_init = copy.deepcopy(init_code[0])
mwpm_init.syndrom()
mwpm_choice = regular_mwpm(mwpm_init)
df_eq_distr1 = np.zeros((4)).astype(np.uint8)
df_eq_distr1[mwpm_choice] = 100
df_eq_distr2 = STDC_general_noise(init_code, params['p_xyz'], p_sampling=params['p_sampling'], steps=params['steps'], droplets=params['droplets'])
if np.argmax(df_eq_distr2) != eq_true:
print('Failed syndrom, total now:', failed_syndroms)
failed_syndroms += 1
df_eq_distr = np.concatenate((df_eq_distr1, df_eq_distr2), axis=0)
# Generate data for DataFrame storage OBS now using full bincount, change this
# Create indices for generated data
names = ['data_nr', 'type']
index_qubit = pd.MultiIndex.from_product([[i], np.arange(1)],
names=names)
index_distr = pd.MultiIndex.from_product([[i], np.arange(1)+1], names=names)
# Add data to Dataframes
df_qubit = pd.DataFrame([[df_qubit.astype(np.uint8)]], index=index_qubit,
columns=['data'])
df_distr = pd.DataFrame([[df_eq_distr]],
index=index_distr, columns=['data'])
# Add dataframes to temporary list to shorten computation time
df_list.append(df_qubit)
df_list.append(df_distr)
# Every x iteration adds data to data file from temporary list
# and clears temporary list
if (i + 1) % 10000 == 0: # this needs to be sufficiently big that rsync has time to sync files before update, maybe change this to be time-based instead.
df = df.append(df_list)
df_list.clear()
print('Intermediate save point reached (writing over)')
df.to_pickle(file_path)
print('Failed so far:', failed_syndroms)
# If the desired amount of errors have been achieved, break the loop and finish up
if failed_syndroms == fixed_errors:
print('Desired amount of failes syndroms achieved, breaking loop.')
break
# Adds any remaining data from temporary list to data file when run is over
if len(df_list) > 0:
df = df.append(df_list)
print('\nSaving all generated data (writing over)')
df.to_pickle(file_path)
print('\nCompleted')
if __name__ == '__main__':
# Get job array id, working directory
array_id = os.getenv('SLURM_ARRAY_TASK_ID')
local_dir = os.getenv('TMPDIR')
size = int(5 + 2 * int(int(array_id) / 32 + 0.0001) + 0.0001)
p_error = np.round((0.05 + float(int(array_id) % 32) / 180), decimals=3)
p_uncorrelated = 1 - np.sqrt(1 - p_error)
p_xz = p_uncorrelated * (1 - p_uncorrelated)
p_y = p_uncorrelated ** 2
p_xyz = np.array([p_xz, p_y, p_xz])
print('size:', size)
params = {'code': "planar",
'method': "uncorrelated_comparison",
'size': size,
'p_error': p_error,
'p_xyz': p_xyz,
'p_sampling': 0.25,#np.round((0.05 + float(array_id) / 50), decimals=2),
'droplets':1,
'mwpm_init':True,
'fixed_errors':None,
'Nc':None,
'iters': 10,
'conv_criteria': 'error_based',
'SEQ': 2,
'TOPS': 10,
'eps': 0.1}
# Steps is a function of code size L
params.update({'steps': int(params['size'] ** 4)})
print('Nbr of steps to take if applicable:', params['steps'])
# Build file path
file_path = os.path.join(local_dir, 'data_size_'+str(params['size'])+'_method_'+params['method']+'_id_' + array_id + '_perror_' + str(params['p_error']) + '.xz')
# Generate data
generate(file_path, params, nbr_datapoints=25000, fixed_errors=params['fixed_errors'])
# View data file
'''iterator = MCMCDataReader(file_path, params['size'])
data = iterator.full()
for k in range(int(len(data)/2)):
qubit_matrix = data[2*k].reshape(2,params['size'],params['size'])
eq_distr = data[2*k+1]
print(qubit_matrix)
print(eq_distr)'''