-
Notifications
You must be signed in to change notification settings - Fork 0
/
Parallel_Merge.f90
265 lines (158 loc) · 6.51 KB
/
Parallel_Merge.f90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!132
module Parallel_Merge
use, intrinsic :: iso_fortran_env, sp => real32, dp => real64
use :: MPI
implicit none
private
public :: Parallel_Merge_Sort
contains
subroutine Parallel_Merge_Sort(local_array, collated_array, root, block_lens, disps, MPI_communicator)
! MPI_Merge_Sort_Indexed takes a real valued array distributed across an mpi communicator. The array segements are sorted
! locally using a merge sort and are gathered by the root process. The root process merges the sorted segements into the
! final sorted array.
real(dp), intent(inout), dimension(:) :: local_array ! array to be sorted
real(dp), intent(inout), dimension(:) :: collated_array ! total sorted array to be formed at the root node
integer, intent(in) :: root ! node at which to form the final sorted array
integer, intent(in), dimension(:) :: block_lens ! the size of the array at each MPI node, ordered by accending rank
integer, intent(in), dimension(:) :: disps !displacement of each array section from the global array start (0 - indexed)
integer, intent(in) :: MPI_communicator
! MPI environment
integer :: flock ! the number of MPI nodes
integer :: rank
integer :: ierr
call MPI_comm_size(MPI_comm_world, flock, ierr)
call MPI_comm_rank(MPI_comm_world, rank, ierr)
call Merge_Sort(local_array, 1, size(local_array))
call MPI_gatherv(local_array, size(local_array), MPI_double, collated_array, block_lens, disps, MPI_double, root, &
& MPI_communicator, ierr)
if (rank == root) then
call Merge_Ordered_Lists(collated_array, flock)
endif
end subroutine Parallel_Merge_Sort
subroutine Insertion_Sort(array)
real(dp), intent(inout), dimension(:) :: array
real(dp) :: temp
integer :: i, j
do i = 2, size(array)
temp = array(i)
j = i - 1
do while (j >= 1)
if (array(j) <= temp) exit
array(j + 1) = array(j)
j = j - 1
enddo
array(j + 1) = temp
enddo
end subroutine
subroutine Merge(array, start, mid, finish)
real(dp), intent(inout), dimension(:) :: array
integer, intent(in) :: start, mid, finish
real(dp), dimension(:), allocatable :: temp
integer :: i, j, k
allocate(temp(finish - start + 1))
i = start
j = mid + 1
k = 1
do while (i <= mid .and. j <= finish)
if (array(i) <= array(j)) then
temp(k) = array(i)
k = k + 1
i = i + 1
else
temp(k) = array(j)
k = k + 1
j = j+ 1
endif
enddo
do while (i <= mid)
temp(k) = array(i)
k = k + 1
i = i + 1
enddo
do while (j <= finish)
temp(k) = array(j)
k = k + 1
j = j + 1
enddo
do i = start, finish
array(i) = temp(i - start + 1)
enddo
end subroutine Merge
recursive subroutine Merge_Sort(array, start, finish)
! Merge_Sort_Indexed performs a merge sort of the local input 'array'. Array segments of sizes less than 128 are sorted by an
! insertion sort so as to prevent over-partitioning and limit openMP task creation.
real(dp), intent(inout), dimension(:) :: array
integer, intent(in) :: start, finish ! specifies the reigon of array to be sorted.
integer :: mid
if (start < finish) then
if (finish - start >= 512) then
mid = (start + finish) / 2
!$omp taskgroup
!$omp task shared(array) untied
call Merge_Sort(array, start, mid)
!$omp end task
!$omp task shared(array) untied
call Merge_Sort(array, mid + 1, finish)
!$omp end task
!$omp end taskgroup
call Merge(array, start, mid, finish)
else
call Insertion_Sort(array(start:finish))
endif
endif
end subroutine Merge_Sort
subroutine Merge_ordered_Lists(array, flock)
! Merge_Ordered_Lists constructs a table 'Bounds' which tracks the lower and upper bounds of each array fragment. This is used
! guide merge sorts on pairs of the collated sorted array partitions in roughly accending size.
real(dp), intent(inout), dimension(:) :: array
integer, intent(in) :: flock
integer :: partition, lower, middle, upper
integer, dimension(:,:), allocatable :: Bounds
integer :: i, indx_1, indx_2
if (flock == 1) return
partition = size(array)/flock
allocate(Bounds(flock, 2))
do i = 1, flock
Bounds(i, : ) = [(i -1)*partition + 1, i*partition]
enddo
Bounds(flock, 2) = size(array)
i = 1
do
lower = 0
middle = 0
upper = 0
do while ((lower == 0) .and. (i < flock))
if (Bounds(i, 1) /= 0) then
lower = Bounds(i, 1)
middle = Bounds(i, 2)
indx_1 = i
i = i + 1
exit
else
i = i + 1
endif
enddo
if ((lower == lbound(array, 1)) .and. (middle == ubound(array, 1))) exit
do while ((upper == 0) .and. (i <= flock))
if (Bounds(i, 2) /= 0) then
upper = Bounds(i, 2)
indx_2 = i
i = i + 1
exit
else
i = i + 1
endif
enddo
if ((lower /= 0) .and. (upper /= 0)) then
!$omp task shared(array, Bounds) firstprivate(upper, middle, lower) untied
call Merge(array, lower, middle, upper)
Bounds(indx_1, :) = [lower, upper]
Bounds(indx_2, :) = 0
!$omp end task
else
!$omp taskwait
i = 1
endif
enddo
end subroutine Merge_ordered_Lists
end module Parallel_Merge