Uploaded by sarwat abbas

report print

advertisement
National University
ofComputer&EmergingSciences-Lahore
CS5009- Advanced Operating Systems
Assignment 1
Question-1
Write the program that works by testing each odd number (up to a specified limit) for
divisibility by all the factors from 3 to the square root of that number. Your task is to
parallelize this algorithm using the OpenMP. The program takes two main parameters,
which are read in from the command line, P: the number of processors (numProcs in the
code); and N: the problem size…cont.….
CODE
#include <iostream>
#include <fstream>
#include <cmath>
#include <vector>
#include <omp.h>
using namespace std;
int main(int argc, char *argv[]) {
int numProcs, size;
bool outputToFile = false;
string outputFile = "";
vector<bool> primes;
// parse command line arguments
if (argc < 3) {
cerr << "Usage: ./program P N [-o outputFile]" << endl;
return 1;
}
numProcs = atoi(argv[1]);
size = atoi(argv[2]);
if (numProcs < 1) {
cerr << "Error: Invalid number of processors." << endl;
return 1;
}
if (size < 3) {
cerr << "Error: Invalid problem size." << endl;
return 1;
}
if (argc > 3) {
if (string(argv[3]) == "-o") {
outputToFile = true;
outputFile = argv[4];
}
}
primes.resize(size/2, true); // initialize all odd numbers as prime
// set up OpenMP
omp_set_num_threads(numProcs);
double start_time = omp_get_wtime();
#pragma omp parallel for schedule(guided)
for (int i = 3; i <= int (sqrt(size)); i += 2) {
if (primes[i/2-1]) {
for (int j = i*i; j <= size; j += 2*i) {
primes[j/2-1] = false;
}
}
}
double end_time = omp_get_wtime();
// print or output results
if (outputToFile) {
ofstream out(outputFile);
for (int i = 0; i < primes.size(); i++) {
if (primes[i]) {
out << 2*i+1 << endl;
}
}
out.close();
} else {
for (int i = 0; i < primes.size(); i++) {
if (primes[i]) {
cout << 2*i+1 << endl;
}
}
}
cout << "Time elapsed: " << end_time - start_time << " seconds" << endl;
return 0;
}
ScreenShot
Question-2
Parallelize the following piece of code using OpenMP with SIZE=1000. Report sequential
execution time, parallel execution time (when using 2 threads, 3 threads and 4 threads), and
the achieved speedup (when used 2 threads, 3 threads, and 4 threads) cont…..
CODE
#include <omp.h>
#include <stdio.h>
#define SIZE 1000
int main() {
int key = 111, x, y, z;
long win = 0;
int i, j, k;
double start_time, end_time;
start_time = omp_get_wtime();
#pragma omp parallel for private(x,y,z) shared(key) reduction(+:win) num_threads(4)
for (i = 0; i < SIZE; i++) {
for (j = 0; j < SIZE; j++) {
for (k = 0; k < SIZE; k++) {
x = (i * i * 1000 / 35) % 1000;
y = (j * j * 1000 / 36) % 1000;
z = (k * k * 1000 / 37) % 1000;
if (key == (x + y + z)) {
win += 1;
}
}
}
}
end_time = omp_get_wtime();
printf("total wins=%ld\n", win);
printf("Time taken = %lf seconds\n", end_time - start_time);
return 0;
}
Output
Question-3
Implement a simple parallel linear search algorithm using OpenMP. The program works on an
integer array of size ‘N’ filled with random numbers and a key-value to be searched. Write the
parallel program to divide the search space among the multiple threads. Your program should
output the total number of times the occurrence of the key found in the array.cont….
Code
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#define N 1000000
int main() {
int a[N], key, count = 0;
// Fill the array with random numbers
for (int i = 0; i < N; i++) {
a[i] = rand();
}
// Generate a random key
key = rand();
#pragma omp parallel for reduction(+:count)
for (int i = 0; i < N; i++) {
if (a[i] == key) {
count++;
}
}
printf("Total occurrences of key %d in the array: %d\n", key, count);
return 0;
}
sOutPut ScreenShot
Question No. 4:
In this question you have to do experimentation on Matrix multiplication using OpenMP. The
size of the N x N matrices in the program should be 100 x 100. Your task is to parallelize this
matrix multiplication program in different ways (course grain, fine grain) and report the
execution time for each program. You can call the function omp_get_wtime() at the beginning
and end of the program to find the elapsed time. The function omp_get_wtime() returns a
double value….cont..
Code
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#define N 100
int main(int argc, char** argv) {
int i, j, k;
double start_time, end_time;
// Allocate memory for matrices A, B, and C
double* A = (double*)malloc(N*N*sizeof(double));
double* B = (double*)malloc(N*N*sizeof(double));
double* C = (double*)malloc(N*N*sizeof(double));
// Initialize matrices A and B
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
A[i*N+j] = i + j;
B[i*N+j] = i - j;
C[i*N+j] = 0.0;
}
}
// Course-grained parallelization
start_time = omp_get_wtime();
#pragma omp parallel for shared(A,B,C) private(i,j,k) default(none)
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
for (k = 0; k < N; k++) {
C[i*N+j] += A[i*N+k] * B[k*N+j];
}
}
}
end_time = omp_get_wtime();
printf("Course-grained parallelization execution time: %f seconds\n", end_time - start_time);
// Fine-grained parallelization
start_time = omp_get_wtime();
#pragma omp parallel shared(A,B,C) private(i,j,k) num_threads(N*N)
{
int tid = omp_get_thread_num();
i = tid / N;
j = tid % N;
#pragma omp for schedule(static)
for (k = 0; k < N; k++) {
C[i*N+j] += A[i*N+k] * B[k*N+j];
}
}
end_time = omp_get_wtime();
printf("Fine-grained parallelization execution time with static scheduling: %f seconds\n",
end_time - start_time);
// Fine-grained parallelization with dynamic scheduling
start_time = omp_get_wtime();
#pragma omp parallel shared(A,B,C) private(i,j,k) num_threads(N*N)
{
int tid = omp_get_thread_num();
i = tid / N;
j = tid % N;
#pragma omp for schedule(dynamic)
for (k = 0; k < N; k++) {
C[i*N+j] += A[i*N+k] * B[k*N+j];
}
}
end_time = omp_get_wtime();
printf("Fine-grained parallelization execution time with dynamic scheduling: %f seconds\n",
end_time - start_time);
// Free memory
free(A);
free(B);
free(C);
return 0;
}
Download