-
Notifications
You must be signed in to change notification settings - Fork 0
/
serial.cpp
231 lines (196 loc) · 7.39 KB
/
serial.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#include "common.h"
#include <cmath>
#include <vector>
#include <set>
#include <unordered_set>
#include <algorithm>
#include <cstdint>
/*
We will preduce B bins for N particles.
The naive runtime is O(N^2). When split into bins,
we end up with O(B * (N/B)^2) = O(N^2/B) work.
Thus, we set B = aN to get a total of O(N/a) work.
We want each bin to roughly fill up the cache,
so the general goal is to maximize a by increasing it until
it starts to make things work.
Ideally, B = aN, but in reality we need a power of 2 bins.
Can try rounding either up or down, but I think rounding down the bin size
makes the most sense. Instead of sqrt(aN) bins on each side, we need
the nearest power of two *above* sqrt(aN).
Even more in reality, bin capacity fitting in cache is probably the sole most
important factor for performance, so that will dictate things. Luckily density
is constant, meaning that size and N increase at the same rate, leading
bin size to directly correspond to a.
*/
//https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
inline int round_up_pow2(const unsigned int n) {
unsigned int v = n;
--v;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return ++v;
}
using std::max;
using std::min;
/*
Bin capacity is what determines actual performance due to cache limitations...
So I think actually fixing bin capacity is best, calculate others from there.
Number of bins is then N / bin_capacity -> bins per side is that rounded
*/
template <unsigned int bin_capacity = 400, class particle = particle_t, unsigned int levels = 3>
struct bin_store {
const unsigned int N;
const unsigned int num_bins_per_side;
const double bin_width;
const double size;
const unsigned int num_bins;
const int block_widths[levels] = { 1, 4, 32, 128 };
//Actual memory backing for the bins.
//Will change this data type if we rearrange the struct or anything.
//Will need to be bin_capacity * num_bins big.
particle *bins;
static inline unsigned int compute_bins_per_side(const unsigned int N) {
unsigned int num_bins = N / bin_capacity;
unsigned int bps = ceil(sqrt(num_bins));
return round_up_pow2(bps);
}
static inline int offset_from_coords(int r, int c, int H, int W, int block_width, bool row_wise) {
if (row_wise) {
return r * W + c * min(block_width, H-r);
} else {
return c * H + r * min(block_width, W-c);
}
}
//Access the ith vector in
inline unsigned int index(const unsigned int x, const unsigned int y, const unsigned int i) {
//TODO: implement Z curve mapping here
int idx = 0;
int H = num_bins_per_side;
int W = num_bins_per_side;
for (int level=levels; level>=1; level--) {
idx += offset_from_coords(x, y, H, W, block_widths[level], true);
x %= block_widths[level];
y %= block_widths[level];
H = std::min(block_widths[level], H-y);
W = std::min(block_widths[level], W-x);
}
return idx;
}
bin_store(const unsigned int N, const double size) : N(N), size(size), num_bins_per_side(compute_bins_per_side(N)),
num_bins(num_bins_per_side * num_bins_per_side), bin_width(size / num_bins_per_side) {
bins = align(64) new particle[num_bins * bin_capacity];
}
~bin_store() {
delete bins;
}
//TODO: implement operator[] for get/set
};
unsigned int num_bins;
unsigned int bin_width;
using std::vector;
using std::set;
vector<vector<set<int>>> bins;
template<int bin_exp>
class BinStore {
};
// Apply the force from neighbor to particle
void apply_force(particle_t& particle, particle_t& neighbor) {
// Calculate Distance
double dx = neighbor.x - particle.x;
double dy = neighbor.y - particle.y;
double r2 = dx * dx + dy * dy;
// Check if the two particles should interact
if (r2 > cutoff * cutoff)
return;
r2 = fmax(r2, min_r * min_r);
double r = sqrt(r2);
// Very simple short-range repulsive force
double coef = (1 - cutoff / r) / r2 / mass;
particle.ax += coef * dx;
particle.ay += coef * dy;
}
void apply_force_bin(particle_t* parts, int bin_x, int bin_y) {
set<int>::iterator itr_i, itr_j;
set<int> bin = bins[bin_x][bin_y];
for (itr_i = bin.begin(); itr_i != bin.end(); itr_i++) {
for (itr_j = bin.begin(); itr_j != bin.end(); itr_j++) {
apply_force(parts[*itr_i], parts[*itr_j]);
}
}
}
void apply_force_between_bins(particle_t* parts, int bin_one_x, int bin_one_y, int bin_two_x, int bin_two_y) {
set<int>::iterator itr_i, itr_j;
set<int> bin_one = bins[bin_one_x][bin_one_y];
set<int> bin_two = bins[bin_two_x][bin_two_y];
for (itr_i = bin_one.begin(); itr_i != bin_one.end(); itr_i++) {
for (itr_j = bin_two.begin(); itr_j != bin_two.end(); itr_j++) {
apply_force(parts[*itr_i], parts[*itr_j]);
}
}
}
// Integrate the ODE
void move(particle_t& p, double size) {
// Slightly simplified Velocity Verlet integration
// Conserves energy better than explicit Euler method
p.vx += p.ax * dt;
p.vy += p.ay * dt;
p.x += p.vx * dt;
p.y += p.vy * dt;
// Bounce from walls
while (p.x < 0 || p.x > size) {
p.x = p.x < 0 ? -p.x : 2 * size - p.x;
p.vx = -p.vx;
}
while (p.y < 0 || p.y > size) {
p.y = p.y < 0 ? -p.y : 2 * size - p.y;
p.vy = -p.vy;
}
}
void init_simulation(particle_t* parts, int num_parts, double size) {
// You can use this space to initialize static, global data objects
// that you may need. This function will be called once before the
// algorithm begins. Do not do any particle simulation here
bin_size = max(min_r, sqrt(cutoff / density));
num_bins = size / bin_size;
bins = vector<vector<set<int>>>(num_bins, vector<set<int>>(num_bins, set<int>()));
for (int i = 0; i < num_parts; i++) {
int x_bin = std::min(num_bins - 1, (int) (parts[i].x / bin_size));
int y_bin = std::min(num_bins - 1, (int) (parts[i].y / bin_size));
bins[x_bin][y_bin].insert(i);
}
}
void simulate_one_step(particle_t* parts, int num_parts, double size) {
// Compute Forces
for (int i = 0; i < num_bins; i++) {
for (int j = 0; j < num_bins; j++) {
apply_force_bin(parts, i, j);
if (j > 0) {
apply_force_between_bins(parts, i, j, i, j-1);
if (i > 0) {
apply_force_between_bins(parts, i, j, i-1, j);
apply_force_between_bins(parts, i, j, i-1, j-1);
}
if (i < num_bins - 1) {
apply_force_between_bins(parts, i, j, i+1, j-1);
}
}
}
}
// Move Particles
for (int i = 0; i < num_parts; ++i) {
double x_old = parts[i].x;
double y_old = parts[i].y;
move(parts[i], size);
if (x_old != parts[i].x || y_old != parts[i].y) {
int x_bin_old = std::min(num_bins - 1, (int) (x_old / bin_size));
int y_bin_old = std::min(num_bins - 1, (int) (y_old / bin_size));
int x_bin_new = std::min(num_bins - 1, (int) (parts[i].x / bin_size));
int y_bin_new = std::min(num_bins - 1, (int) (parts[i].y / bin_size));
bins[x_bin_old][y_bin_old].erase(i);
bins[x_bin_new][y_bin_new].insert(i);
}
}
}