Question: Add an algorithm to this kernel code that finds the maximum value in the vector it will reduce. __global__ void reduction(float *out, float *in, unsigned

Add an algorithm to this kernel code that finds the maximum value in the vector it will reduce.

__global__ void reduction(float *out, float *in, unsigned size) {

// INSERT KERNEL CODE HERE

#ifdef SIMPLE __shared__ float in_s[2 * BLOCK_SIZE]; int idx = 2 * blockIdx.x * blockDim.x + threadIdx.x;

in_s[threadIdx.x] = ((idx < size) ? in[idx] : 0.0f); in_s[threadIdx.x + BLOCK_SIZE] = ((idx + BLOCK_SIZE < size) ? in[idx + BLOCK_SIZE] : 0.0f);

for (int stride = 1; stride < BLOCK_SIZE << 1; stride <<= 1) { __syncthreads(); if (threadIdx.x % stride == 0) in_s[2 * threadIdx.x] += in_s[2 * threadIdx.x + stride]; }

#else __shared__ float in_s[BLOCK_SIZE]; int idx = 2 * blockIdx.x * blockDim.x + threadIdx.x;

in_s[threadIdx.x] = ((idx < size) ? in[idx] : 0.0f) + ((idx + BLOCK_SIZE < size) ? in[idx + BLOCK_SIZE] : 0.0f);

for (int stride = BLOCK_SIZE >> 1; stride > 0; stride >>= 1) { __syncthreads(); if (threadIdx.x < stride) in_s[threadIdx.x] += in_s[threadIdx.x + stride]; } #endif

if (threadIdx.x == 0) out[blockIdx.x] = in_s[0]; }

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!