Analyze the code solution below and discuss any gain in performance.  To gather measurement values, use any CUDA or OpenMP library API that is used to measure performance (time, speedup, etc..) or by using what is offered by the compiler (C, C++). #include //malloc and free #include //printf #include //OpenMP // Very small values for this simple illustrative example #define ARRAY_SIZE 8 //Size of arrays whose elements will be added together. #define NUM_THREADS 4 //Number of threads to use for vector addition. /* int main (int argc, char *argv[]) { // elements of arrays a and b will be added // and placed in array c int * a; int * b; int * c; intn = ARRAY_SIZE; // number of array elements intn_per_thread; // elements per thread inttotal_threads = NUM_THREADS; // number of threads to use inti; // loop index // allocate space for the arrays a = (int *) malloc(sizeof(int)*n); b = (int *) malloc(sizeof(int)*n); c = (int *) malloc(sizeof(int)*n); // initialize arrays a and b with consecutive integer values // as a simple example for(i=0; i

Microsoft Visual C#
7th Edition
ISBN:9781337102100
Author:Joyce, Farrell.
Publisher:Joyce, Farrell.
Chapter8: Advanced Method Concepts
Section: Chapter Questions
Problem 7RQ
icon
Related questions
Topic Video
Question

Analyze the code solution below and discuss any gain in performance.  To gather measurement values, use any CUDA or OpenMP library API that is used to measure performance (time, speedup, etc..) or by using what is offered by the compiler (C, C++).

#include <stdlib.h> //malloc and free
#include <stdio.h> //printf
#include <omp.h> //OpenMP

// Very small values for this simple illustrative example
#define ARRAY_SIZE 8 //Size of arrays whose elements will be added together.
#define NUM_THREADS 4 //Number of threads to use for vector addition.

/*

int main (int argc, char *argv[])
{
// elements of arrays a and b will be added
// and placed in array c
int * a;
int * b;
int * c;
intn = ARRAY_SIZE; // number of array elements
intn_per_thread; // elements per thread
inttotal_threads = NUM_THREADS; // number of threads to use
inti; // loop index
// allocate space for the arrays
a = (int *) malloc(sizeof(int)*n);
b = (int *) malloc(sizeof(int)*n);
c = (int *) malloc(sizeof(int)*n);

// initialize arrays a and b with consecutive integer values

// as a simple example

for(i=0; i<n; i++)
{
a[i] = i;
}

for(i=0; i<n; i++)
{
b[i] = i;
}

// Additional work to set the number of threads.
// We hard-code to 4 for illustration purposes only.
omp_set_num_threads(total_threads);

// determine how many elements each process will work on
n_per_thread = n/total_threads;

// Compute the vector addition
// Here is where the 4 threads are specifically 'forked' to
// execute in parallel. This is directed by the pragma and
// thread forking is compiled into the resulting executable.
// Here we use a 'static schedule' so each thread works on
// a 2-element chunk of the original 8-element arrays.

#pragmaompparallelforshared(a, b, c) private(i) schedule(static, n_per_thread)

for(i=0; i<n; i++)
{
c[i] = a[i]+b[i];

// Which thread am I? Show who works on what for this small example
printf("Thread %d works on element%d\n", omp_get_thread_num(), i);
}

// Check for correctness (only plausible for small vector size)
// A test we would eventually leave out
printf("i\ta[i]\t+\tb[i]\t=\tc[i]\n");
for(i=0; i<n; i++)
{
printf("%d\t%d\t\t%d\t\t%d\n", i, a[i], b[i], c[i]);
}

// clean up memory
free(a);
free(b);
free(c);

return0;
}

 

Expert Solution
steps

Step by step

Solved in 3 steps with 2 images

Blurred answer
Knowledge Booster
Instruction Format
Learn more about
Need a deep-dive on the concept behind this application? Look no further. Learn more about this topic, computer-science and related others by exploring similar questions and additional content below.
Similar questions
  • SEE MORE QUESTIONS
Recommended textbooks for you
Microsoft Visual C#
Microsoft Visual C#
Computer Science
ISBN:
9781337102100
Author:
Joyce, Farrell.
Publisher:
Cengage Learning,