-
Notifications
You must be signed in to change notification settings - Fork 1
/
OptQC_Main.f90
408 lines (379 loc) · 12.6 KB
/
OptQC_Main.f90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
subroutine OptQC_REAL(root,my_rank,p,args_obj)
use common_module
use csd_mpi
use csd_perm
use csd_tools
use m_mrgrnk
use mpi
use rng
implicit none
character(len=128) :: finput, fhist, fperm, ftex
integer :: N, M, d, i, j, k, l, Msq, num
integer :: ecur, enew, esol, idx_sol
integer, allocatable :: history(:)
integer :: hist_idx, m_pos
! Input arrays
double precision, allocatable :: X(:,:)
! Functions
integer :: FindMinPos, CalcTol, ChooseN
! Workspace arrays
integer, allocatable :: index_level(:), index_pair(:,:,:)
double precision, allocatable :: COEFF(:,:)
integer, allocatable :: Perm(:), Perm_new(:), Perm_sol(:)
integer, allocatable :: QPerm(:), QPerm_new(:)
! MPI variables
integer :: my_rank, p, ierr, root
integer :: MPI_int_buffer(1)
double precision, allocatable :: X_MPI_temp(:)
integer, allocatable :: r_col_array(:)
integer :: mpi_stat(MPI_STATUS_SIZE)
integer :: mpi_group_world, mpi_group_local, mpi_comm_local
! Timer variables
double precision :: c_start, c_mid, c_end, c_time1, c_time2, c_t1, c_t2, c_tot
! Synchronization variables
integer :: SYNCH_ct, POPTNUM
integer :: p_group
integer, allocatable :: p_rank_pos(:), groupsizes(:)
type(l_arr_int_1) :: subgroups
character, pointer :: synch_buffer(:)
! Objects
integer :: nset
integer, allocatable :: type_spec(:)
type(prog_args) :: args_obj
type(csd_generator) :: csdgen_obj
type(csd_solution_set) :: csdss_Xinit, csdss_Xcur, csdss_Xnew, csdss_Xsol
type(csd_write_handle) :: csdwh_obj
type(csd_mpi_handle) :: csdmh_obj
! Initialize the RNG object from the module
call rng_inst%seed(my_rank)
! Set the number of matrices to be 5
nset = 5
! Get group handle to MPI_COMM_WORLD
call MPI_Comm_group(MPI_COMM_WORLD,mpi_group_world,ierr)
if(my_rank == root) then
write(*,'(a)')"Performing decomposition of a real orthogonal matrix."
write(finput,'(a,a)')args_obj%fbase(1:args_obj%flength),".txt"
write(*,'(a,a)')"Retrieving unitary matrix from file ",finput
open(unit=1,file=finput,action='read')
read(1,*)d
N = ChooseN(d)
M = 2**N
Msq = M*M
allocate(X(M,M))
allocate(X_MPI_temp(Msq))
X = 0.0d0
do i = 1, d
do j = 1, d
read(1,*)X(i,j)
end do
end do
if(M /= d) then
do i = d+1, M
X(i,i) = 1.0d0
end do
end if
close(1)
MPI_int_buffer(1) = N
call Flatten(M,X,X_MPI_temp)
end if
! Broadcast the matrix X from the root process to all the other processes
! Uses a flattened matrix for broadcasting, which is reshaped into a matrix.
call MPI_Bcast(MPI_int_buffer,1,MPI_INTEGER,root,MPI_COMM_WORLD,ierr)
if(my_rank /= root) then
N = MPI_int_buffer(1)
M = 2**N
Msq = M*M
allocate(X(M,M))
allocate(X_MPI_temp(Msq))
end if
call MPI_Bcast(X_MPI_temp,Msq,MPI_DOUBLE_PRECISION,root,MPI_COMM_WORLD,ierr)
if(my_rank /= root) then
call Box(M,X_MPI_temp,X)
end if
! Set up modules
call initialize_csd_perm(N,M)
! Start timing here so as to include time spent choosing and decomposing the initial permutation
c_start = MPI_Wtime()
! Allocate record arrays
allocate(history(1+args_obj%PERM_ITER_LIM+args_obj%ITER_LIM))
allocate(r_col_array(p))
! Allocate input and output arrays
allocate(index_level(M-1))
allocate(index_pair(M/2,2,N))
allocate(COEFF(M,M))
index_level = 0
index_pair = 0
COEFF = 0.0d0
! index_level and index_pair are invariant per CSD invocation
call CYG_INDEXTABLE(N,M,index_level,index_pair)
! COEFF is also invariant per CSD invocation
call CYGC_COEFF(N,M,COEFF)
! Allocate workspace arrays
allocate(Perm(M))
allocate(Perm_new(M))
allocate(Perm_sol(M))
allocate(QPerm(N))
allocate(QPerm_new(N))
! Start with the identity permutation
do i = 1, M
Perm(i) = i
Perm_new(i) = i
Perm_sol(i) = i
end do
! Initialize all process to the identity qubit permutation first - however, do not perform qubit permutation selection on root process
do i = 1, N
QPerm(i) = i
end do
! Run object constructors
allocate(type_spec(nset))
do i = 1, nset
! Set all matrices to be of real type
type_spec(i) = 0
end do
call csdgen_obj%constructor(N,M,0,index_level,index_pair,COEFF)
call csdss_Xinit%constructor(N,M,nset,type_spec)
csdss_Xinit%arr(1)%toggle_csd = .false.
csdss_Xinit%arr(2)%toggle_csd = .false.
csdss_Xinit%arr(4)%toggle_csd = .false.
csdss_Xinit%arr(5)%toggle_csd = .false.
call csdss_Xcur%constructor(N,M,nset,type_spec)
call csdss_Xnew%constructor(N,M,nset,type_spec)
call csdss_Xsol%constructor(N,M,nset,type_spec)
call csdwh_obj%constructor(N)
call csdmh_obj%constructor(N,M)
! Convention: U = Q^T P^T U' P Q - CHECKED AND VERIFIED
call permlisttomatrixtr(M,Perm,csdss_Xinit%arr(2)%X) ! P^T
call permlisttomatrix(M,Perm,csdss_Xinit%arr(4)%X) ! P
call qperm_process(N,M,csdss_Xinit,csdgen_obj,QPerm,X,ecur)
! Initialize history stuffs
history(1) = ecur
hist_idx = 2
! Repeatedly find random permutations until the number of gates has been lowered relative to the original number of gates (i.e. the identity qubit permutation)
if(my_rank /= root) then
do i = 1, args_obj%PERM_ITER_LIM
call qperm_generate(N,QPerm_new)
call qperm_process(N,M,csdss_Xinit,csdgen_obj,QPerm_new,X,enew)
if(enew <= ecur) then
QPerm = QPerm_new
ecur = enew
end if
history(hist_idx) = ecur
hist_idx = hist_idx + 1
end do
! Rerun qperm_process to ensure consistency with the optimal QPerm
call qperm_process(N,M,csdss_Xinit,csdgen_obj,QPerm,X,ecur)
else
hist_idx = hist_idx + args_obj%PERM_ITER_LIM
history(2:hist_idx) = history(1)
end if
c_mid = MPI_Wtime()
! Initialize solution to initial setup
call csdss_Xcur%copy(csdss_Xinit)
enew = ecur
call csdss_Xnew%copy(csdss_Xinit)
esol = ecur
call csdss_Xsol%copy(csdss_Xinit)
idx_sol = 0
SYNCH_ct = 0
! Note: Might change this later - currently hardcoded to be 10% of the number of processes
POPTNUM = ceiling(0.1d0 * p)
! Allocate synchronization variables
allocate(p_rank_pos(p))
allocate(groupsizes(POPTNUM))
call subgroups%constructor(POPTNUM)
! Determine group sizes
i = floor(p/dble(POPTNUM))
j = p - (i * POPTNUM)
groupsizes = i
if(j > 0) groupsizes(1:j) = i+1
do i = 1, POPTNUM
call subgroups%l(i)%constructor(groupsizes(i))
end do
c_tot = 0.0d0
do i = 1, args_obj%ITER_LIM
Perm_new = Perm
call NeighbourhoodOpt(N,M,csdss_Xinit,csdss_Xcur,csdss_Xnew,Perm_new)
call csdss_Xnew%run_csdr(csdgen_obj)
enew = csdss_Xnew%csdr_ss_ct
!write(*,*)"New number of gates = ",ecur,"/",enew
if(enew <= ecur) then
call csdss_Xcur%copy(csdss_Xnew)
ecur = enew
Perm = Perm_new
if(enew < esol) then
call csdss_Xsol%copy(csdss_Xcur)
esol = ecur
Perm_sol = Perm
idx_sol = i
end if
end if
! Perform synchronization and comparisons after every SYNCH_NUM iterations
SYNCH_ct = SYNCH_ct + 1
if(SYNCH_ct == args_obj%SYNCH_NUM .and. POPTNUM < p) then
c_t1 = MPI_Wtime()
! Wait for all processes to reach this point - possible slowdown issue between fast/slow threads
call MPI_Barrier(MPI_COMM_WORLD,ierr)
! Collate all current objective function values
MPI_int_buffer(1) = ecur
call MPI_Allgather(MPI_int_buffer,1,MPI_INTEGER,r_col_array,1,MPI_INTEGER,MPI_COMM_WORLD,ierr)
! Obtain ranking for each process
p_rank_pos = -1
call mrgrnk(r_col_array,p_rank_pos)
! Construct groupings - note index i is already used >_>
l = POPTNUM+1
do j = 1, POPTNUM
num = p_rank_pos(j)
if(my_rank+1 == num) p_group = j
subgroups%l(j)%arr(1) = num-1
do k = 2, groupsizes(j)
num = p_rank_pos(l)
if(my_rank+1 == num) p_group = j
subgroups%l(j)%arr(k) = num-1
l = l + 1
end do
end do
! Get handle to the new group
call MPI_Group_incl(mpi_group_world,groupsizes(p_group),subgroups%l(p_group)%arr,mpi_group_local,ierr)
! Create communicator for the new group
call MPI_Comm_create(MPI_COMM_WORLD,mpi_group_local,mpi_comm_local,ierr)
! Broadcast optimal solutions from root processes to the rest of the local groups - note that root process (the most optimal) is always the rank 0 in each group
call csdmh_obj%synchronize(QPerm,Perm,csdss_Xcur,mpi_group_local,mpi_comm_local,0)
! Deallocate the new group and corresponding communicator
call MPI_Group_free(mpi_group_local,ierr)
call MPI_Comm_free(mpi_comm_local,ierr)
! Copy over current values
ecur = csdss_Xcur%csdr_ss_ct
if(ecur < esol) then
call csdss_Xsol%copy(csdss_Xcur)
esol = ecur
Perm_sol = Perm
idx_sol = args_obj%ITER_LIM
end if
! Reset synchronization count
SYNCH_ct = 0
c_t2 = MPI_Wtime()
c_tot = c_tot + c_t2 - c_t1
end if
! Update history
history(hist_idx) = ecur
hist_idx = hist_idx + 1
end do
if(ecur < esol) then
call csdss_Xsol%copy(csdss_Xcur)
esol = ecur
Perm_sol = Perm
idx_sol = args_obj%ITER_LIM
end if
! End timing here!
c_end = MPI_Wtime()
c_time1 = c_mid - c_start
c_time2 = c_end - c_mid
! Write the .tex file based on the best solution
MPI_int_buffer(1) = esol
call MPI_Allgather(MPI_int_buffer,1,MPI_INTEGER,r_col_array,1,MPI_INTEGER,MPI_COMM_WORLD,ierr)
m_pos = FindMinPos(p,r_col_array)
! Perform I/O operations from the process with the least number of gates - eliminates need
! to communicate results back to the root.
if(my_rank == m_pos-1) then
write(*,'(a,i4,a,i8,a)')"Process ",my_rank," has the least number of gates: ",esol," gates."
write(*,'(a)')"Writing solution to files."
write(*,*)
! Initialize filenames
write(fhist,'(a,a)')args_obj%fbase(1:args_obj%flength),"_history.dat"
write(fperm,'(a,a)')args_obj%fbase(1:args_obj%flength),"_perm.dat"
write(ftex,'(a,a)')args_obj%fbase(1:args_obj%flength),"_circuit.tex"
! Assign ftex to the csd_write_handle object
call csdwh_obj%set_file(ftex)
! Write the history of the algorithm to a file
open(unit=1,file=fhist,action='write')
write(1,'(i8,a,i8)')args_obj%PERM_ITER_LIM," ",args_obj%ITER_LIM
do i = 0, args_obj%PERM_ITER_LIM + args_obj%ITER_LIM
write(1,'(i8,a,i8)')i," ",history(i+1)
end do
close(1)
! Write the optimal qubit permutation list q and the permutation list p
open(unit=2,file=fperm,action='write')
do i = 1, N
write(2,'(i15,a)',advance='no')QPerm(i)," "
end do
write(2,*)
do i = 1, M
write(2,'(i15,a)',advance='no')Perm_sol(i)," "
end do
write(2,*)
close(2)
! Write the .tex files
call csdss_Xsol%write_circuit(csdwh_obj)
end if
call flush(6)
call MPI_Barrier(MPI_COMM_WORLD,ierr)
MPI_int_buffer(1) = 0
! Assuming root = 0
if(my_rank /= root) then
call MPI_Recv(MPI_int_buffer,1,MPI_INTEGER,my_rank-1,101,MPI_COMM_WORLD,mpi_stat,ierr)
end if
write(*,'(a,i4,a,i8,a,i8)')"Process ",my_rank,": Initial number of gates before/after reduction = ",csdss_Xinit%csd_ss_ct,"/",csdss_Xinit%csdr_ss_ct
write(*,'(a,i4,a,i8,a,i8,a,i8,a,i8,a,i8,a,i8,a,i8)')"Process ",my_rank,": Breakdown of solution = ",csdss_Xsol%arr(1)%csdr_ct,"/",csdss_Xsol%arr(2)%csdr_ct,"/",csdss_Xsol%arr(3)%csdr_ct,"/",csdss_Xsol%arr(4)%csdr_ct,"/",csdss_Xsol%arr(5)%csdr_ct,"/",csdss_Xsol%csdr_ss_ct," at step number ",idx_sol
write(*,'(a,i4,a,f15.9,a,f15.9,a,f15.9,a)')"Process ",my_rank,": Time taken = ",c_time1,"/",c_time2,"/",c_tot," seconds."
call flush(6)
if(my_rank /= p-1) then
call MPI_Send(MPI_int_buffer,1,MPI_INTEGER,my_rank+1,101,MPI_COMM_WORLD,ierr)
end if
! Finalize modules
call finalize_csd_perm()
! Deallocate group handles
call MPI_Group_free(mpi_group_world,ierr)
! Free up memory
deallocate(X)
deallocate(X_MPI_temp)
deallocate(history)
deallocate(r_col_array)
deallocate(index_level)
deallocate(index_pair)
deallocate(COEFF)
deallocate(Perm)
deallocate(Perm_new)
deallocate(Perm_sol)
deallocate(QPerm)
deallocate(QPerm_new)
deallocate(type_spec)
deallocate(p_rank_pos)
deallocate(groupsizes)
! Run object destructors
call subgroups%destructor()
call csdgen_obj%destructor()
call csdss_Xinit%destructor()
call csdss_Xcur%destructor()
call csdss_Xnew%destructor()
call csdss_Xsol%destructor()
call csdwh_obj%destructor()
call csdmh_obj%destructor()
end subroutine OptQC_REAL
subroutine Box(M,source,dest)
implicit none
integer :: M
double precision :: source(M*M), dest(M,M)
integer :: i, j, ct
ct = 1
do i = 1, M
do j = 1, M
dest(i,j) = source(ct)
ct = ct + 1
end do
end do
return
end subroutine Box
subroutine Flatten(M,source,dest)
implicit none
integer :: M
double precision :: source(M,M), dest(M*M)
integer :: i, j, ct
ct = 1
do i = 1, M
do j = 1, M
dest(ct) = source(i,j)
ct = ct + 1
end do
end do
return
end subroutine Flatten