def seperation(boid: Boid, boids: Boid[]) {
c: Vec3 = 0
for (b : boids) {
if b != boid and distance(b, boid) < seperation_distance
c -= (neighbor.position - boid.position)
}
return c * seperation_scale
}
def alignment(boid: Boid, boids: Boid[]) {
perceived_velocity: Vec3 = 0
neighbors_count = 0
for (b : boids) {
if b != boid and distance(b, boid) < alignment_distance
perceived_velocity += b.velocity - boid.velocity
++neighbors_count
}
if (neighbors_count >= 0) perceived_velocity /= neighbors_count
return perceived_velocity * alignment_scale
}
def cohesion(boid: Boid, boids: Boid[]) {
center_of_mass: Vec3 = 0
neighbors_count = 0
for (b : boids) {
if b != boid and distance(b, boid) < cohesion_distance
center_of_mass += b.position
++neighbors_count
}
if (neighbors_count > 0) {
center_of_mass /= neighbors_count
return (center_of_mass - boid.position) * cohesion_scale
}
return 0
}
def step_naive(pos, vel1, vel2) {
for parallel (boid : boids) {
new_vel = vel1[boid];
for (others : boids) {
// Accumulate for new_vel
}
vel2[boid] = new_vel;
}
update_pos(pos, vel2);
swap(vel1, vel2);
}
def step_naive(pos, vel1, vel2) {
for parallel (boid : boids) {
new_vel = vel1[boid];
for (others : boids) {
// Accumulate for new_vel
}
vel2[boid] = new_vel;
}
update_pos(pos, vel2);
swap(vel1, vel2);
}
Explicit Euler
def step_naive(pos, vel1, vel2) {
for parallel (boid : boids) {
new_vel = vel1[boid];
for (others : boids) {
// Accumulate for new_vel
}
vel2[boid] = new_vel;
}
update_pos(pos, vel2);
swap(vel1, vel2);
}
Buffer Ping-Ponging
Sort:
thrust::sort_by_key(grid_indices,
grid_indices + objects_count,
boid_indices);
Sort:
Sort:
Indirection:
def step_uniform_grid(pos, vel1, vel2) {
indices, grid_indices = compute_indices();
sort_by_key(grid_indices, indices);
grid_first, grid_last = identify_first_last(grid_indices);
for parallel (boid : boids) {
neighbor_grid_cells = calculate_neighbor_grid_cells(boid);
new_vel = vel1[boid];
for (cell : neighbor_grid_cells) {
for (neighbor from grid_first[cell] to grid_last[cell]) {
neighbor_pos = pos[indices[neighbor]];
neighbor_vel = vel[indices[neighbor]];
// Accumulate for new_vel
}
}
vel2[boid] = new_vel;
}
update_pos(pos, vel2);
swap(vel1, vel2);
}
thrust::copy(pos, pos + objects_count, pos_sorted);
thrust::copy(vel, vel + objects_count, vel_sorted);
thrust::sort_by_key(grid_indices,
grid_indices + objects_count,
pos_sorted,
vel_sorted);
thrust::copy(pos, pos + objects_count, pos_sorted);
thrust::copy(vel, vel + objects_count, vel_sorted);
thrust::sort_by_key(grid_indices,
grid_indices + objects_count,
pos_sorted,
vel_sorted);
thrust::sort_by_key(grid_indices,
grid_indices + objects_count,
indices);
thrust::gather(indices, indices + objects_count, pos, pos_sorted);
thrust::gather(indices, indices + objects_count, vel, vel_sorted);
def step_coherent_grid(pos, vel1, vel2) {
indices, grid_indices = compute_indices();
pos_sorted, vel_sorted = pos, vel1
sort_by_key(grid_indices, pos_sorted, vel_sorted);
grid_first, grid_last = identify_first_last(grid_indices);
for parallel (boid : boids) {
neighbor_grid_cells = calculate_neighbor_grid_cells(boid);
new_vel = vel1[boid];
for (cell : neighbor_grid_cells) {
for (neighbor from grid_first[cell] to grid_last[cell]) {
neighbor_pos = pos_sorted[neighbor];
neighbor_vel = vel_sorted[neighbor];
// Accumulate for new_vel
}
}
vel2[boid] = new_vel;
}
update_pos(pos, vel2);
swap(vel1, vel2);
swap(pos, pos_sorted);
}
Slow
Slow
Faster (Read-Only on Device)
Slow
Faster (Read-Only on Device)
Fast
def step_shared_grid(pos, vel1, vel2) {
// same
for parallel (warp: warps) {
while (not process all neighbors) {
// calculate all neighbor grid cells in a wrap
// copy some boids in neighbor cells to shared memory based on capacity
for parallel boids in wrap {
for (neighbor from shared_first to shared_last) {
// Accumulate for new_vel
}
vel2[boid] = new_vel;
}
}
}
// same
}