National University ofComputer&EmergingSciences-Lahore CS5009- Advanced Operating Systems Assignment 1 Question-1 Write the program that works by testing each odd number (up to a specified limit) for divisibility by all the factors from 3 to the square root of that number. Your task is to parallelize this algorithm using the OpenMP. The program takes two main parameters, which are read in from the command line, P: the number of processors (numProcs in the code); and N: the problem size…cont.…. CODE #include <iostream> #include <fstream> #include <cmath> #include <vector> #include <omp.h> using namespace std; int main(int argc, char *argv[]) { int numProcs, size; bool outputToFile = false; string outputFile = ""; vector<bool> primes; // parse command line arguments if (argc < 3) { cerr << "Usage: ./program P N [-o outputFile]" << endl; return 1; } numProcs = atoi(argv[1]); size = atoi(argv[2]); if (numProcs < 1) { cerr << "Error: Invalid number of processors." << endl; return 1; } if (size < 3) { cerr << "Error: Invalid problem size." << endl; return 1; } if (argc > 3) { if (string(argv[3]) == "-o") { outputToFile = true; outputFile = argv[4]; } } primes.resize(size/2, true); // initialize all odd numbers as prime // set up OpenMP omp_set_num_threads(numProcs); double start_time = omp_get_wtime(); #pragma omp parallel for schedule(guided) for (int i = 3; i <= int (sqrt(size)); i += 2) { if (primes[i/2-1]) { for (int j = i*i; j <= size; j += 2*i) { primes[j/2-1] = false; } } } double end_time = omp_get_wtime(); // print or output results if (outputToFile) { ofstream out(outputFile); for (int i = 0; i < primes.size(); i++) { if (primes[i]) { out << 2*i+1 << endl; } } out.close(); } else { for (int i = 0; i < primes.size(); i++) { if (primes[i]) { cout << 2*i+1 << endl; } } } cout << "Time elapsed: " << end_time - start_time << " seconds" << endl; return 0; } ScreenShot Question-2 Parallelize the following piece of code using OpenMP with SIZE=1000. Report sequential execution time, parallel execution time (when using 2 threads, 3 threads and 4 threads), and the achieved speedup (when used 2 threads, 3 threads, and 4 threads) cont….. CODE #include <omp.h> #include <stdio.h> #define SIZE 1000 int main() { int key = 111, x, y, z; long win = 0; int i, j, k; double start_time, end_time; start_time = omp_get_wtime(); #pragma omp parallel for private(x,y,z) shared(key) reduction(+:win) num_threads(4) for (i = 0; i < SIZE; i++) { for (j = 0; j < SIZE; j++) { for (k = 0; k < SIZE; k++) { x = (i * i * 1000 / 35) % 1000; y = (j * j * 1000 / 36) % 1000; z = (k * k * 1000 / 37) % 1000; if (key == (x + y + z)) { win += 1; } } } } end_time = omp_get_wtime(); printf("total wins=%ld\n", win); printf("Time taken = %lf seconds\n", end_time - start_time); return 0; } Output Question-3 Implement a simple parallel linear search algorithm using OpenMP. The program works on an integer array of size ‘N’ filled with random numbers and a key-value to be searched. Write the parallel program to divide the search space among the multiple threads. Your program should output the total number of times the occurrence of the key found in the array.cont…. Code #include <stdio.h> #include <stdlib.h> #include <omp.h> #define N 1000000 int main() { int a[N], key, count = 0; // Fill the array with random numbers for (int i = 0; i < N; i++) { a[i] = rand(); } // Generate a random key key = rand(); #pragma omp parallel for reduction(+:count) for (int i = 0; i < N; i++) { if (a[i] == key) { count++; } } printf("Total occurrences of key %d in the array: %d\n", key, count); return 0; } sOutPut ScreenShot Question No. 4: In this question you have to do experimentation on Matrix multiplication using OpenMP. The size of the N x N matrices in the program should be 100 x 100. Your task is to parallelize this matrix multiplication program in different ways (course grain, fine grain) and report the execution time for each program. You can call the function omp_get_wtime() at the beginning and end of the program to find the elapsed time. The function omp_get_wtime() returns a double value….cont.. Code #include <stdio.h> #include <stdlib.h> #include <omp.h> #define N 100 int main(int argc, char** argv) { int i, j, k; double start_time, end_time; // Allocate memory for matrices A, B, and C double* A = (double*)malloc(N*N*sizeof(double)); double* B = (double*)malloc(N*N*sizeof(double)); double* C = (double*)malloc(N*N*sizeof(double)); // Initialize matrices A and B for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { A[i*N+j] = i + j; B[i*N+j] = i - j; C[i*N+j] = 0.0; } } // Course-grained parallelization start_time = omp_get_wtime(); #pragma omp parallel for shared(A,B,C) private(i,j,k) default(none) for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { for (k = 0; k < N; k++) { C[i*N+j] += A[i*N+k] * B[k*N+j]; } } } end_time = omp_get_wtime(); printf("Course-grained parallelization execution time: %f seconds\n", end_time - start_time); // Fine-grained parallelization start_time = omp_get_wtime(); #pragma omp parallel shared(A,B,C) private(i,j,k) num_threads(N*N) { int tid = omp_get_thread_num(); i = tid / N; j = tid % N; #pragma omp for schedule(static) for (k = 0; k < N; k++) { C[i*N+j] += A[i*N+k] * B[k*N+j]; } } end_time = omp_get_wtime(); printf("Fine-grained parallelization execution time with static scheduling: %f seconds\n", end_time - start_time); // Fine-grained parallelization with dynamic scheduling start_time = omp_get_wtime(); #pragma omp parallel shared(A,B,C) private(i,j,k) num_threads(N*N) { int tid = omp_get_thread_num(); i = tid / N; j = tid % N; #pragma omp for schedule(dynamic) for (k = 0; k < N; k++) { C[i*N+j] += A[i*N+k] * B[k*N+j]; } } end_time = omp_get_wtime(); printf("Fine-grained parallelization execution time with dynamic scheduling: %f seconds\n", end_time - start_time); // Free memory free(A); free(B); free(C); return 0; }