Week 9 Power Point Slides

Pthreads – Create and Join
IEEE Portable Operating System Interface Standard (POSIX)
• Spawn an attached thread
(&thread1, NULL, proc1, &arg)
pthread_join(thread1, status)
Thread execution
void proc1(&arg)
// Thread code
• Detatched threads
– Join is not needed
– The OS destroys thread
resources when they
– A parameter in the create
call indicates a detached
Note: The Pthreads library must be available : #include <pthread.h>
Executing a Thread
• Declare a lock: pthread_mutex_t mutex;
• Declare a mutex attribute: pthread_mutexattr_t mta;
• Initializing an attribute (spin_only, limited_spin, no_spin, recursive, metered)
pthread_mutexattr_settype(&mta, PTHREAD_MUTEX_RECURSIVE);
pthread_mutexattr_setname_np(&mta, "My Mutex");
• Initialize a mutex
pthread_mutex_init(&mutex, NULL);
pthread_mutex_init(&mutex, &mta);
// Use defaults
// or use designated attributes
• Enter and release
Pthread_mutex_lock(&mutex); and pthread_mutex_unlock(&mutex);
• Try Lock without block: pthread_mutex_trylock(&mutex);
• Release resources
pthread_mutex_destroy(mutex); and pthread_mutexattr_destroy(&mta);
Required: # include <semaphore . h>
Semaphores are not part of Pthreads
• Initialize
int sem_init ( sem_t *sem,
int shared, /* non-zero to share between processes */
unsigned initial_val ) ; /* one works like a mutex */
• Destroy semaphore: int sem_destroy ( sem_t sem) ;
• Post (V): int sem_post ( sem_t sem ) ;
• Wait (P): int sem_wait ( sem_t sem ) ;
Semaphores can count up or down and can start with any positive integer value
Condition Variables
while (c <> 0)
if (c == 0)
Note: Signals are missed if a thread is not waiting when they are sent
Read-Write Locks
More than reader is allowed, however, writing is exclusive
• Initialize
int pthread_rwlock_init (pthread_rwlock_t *readWriteLock ,
const pthread_rwlockattr_t attr_p ) ;
• Locking
i n t pthread_rwlock_rdlock( pthread_rwlock_t readWriteLock ) ;
i n t pthread_rwlock_wrlock( pthread_rwlock_t readWriteLock ) ;
• Unlock: int pthread_rwlock_unlock( pthread_rwlock_t readWriteLock ) ;
• Destroy: int pthread_rwlock_destroy ( pthread_rwlock_t readWriteLock ) ;
Practical Example: Multithreaded linked lists or binary trees
Hello World: Pthreads
Make sure to include: <stdio.h>, <stdlib.h>, and <pthread.h>
void* Hello ( void* myRank )
{ printf ( "Hello from thread %ld\n" , (long)(*myRank) ) ;
return NULL ;
void main ( int argc , char argv [ ] )
{ long t;
pthread_t[] threadHandles ;
int threads = strtol ( argv [ 1 ] , NULL , 1 0 ) ;
thread_handles = malloc ( threads * sizeof( pthread_t ) ) ;
for ( t = 0 ; t< threads; t ++ )
pthread_create(&threadHandles [ t ] , NULL ,Hello , ( void *) &t );
printf ( "Hello from the main thread\n" ) ;
for ( t= 0 ; t < threads; t ++) pthread_join ( threadHandles [ t ] , NULL ) ;
free( threadHandles ) ;
Pthreads Version
void* matrixMult( void *rank )
{ i nt r, c , myM = m / (int)threads ;
Sequential Version
long myRank = (long)(*rank);
for ( r = 0 ; r < m ; r++)
int startRow = myRank * myM ;
{ y [ r ] = 0.0 ;
int lastRow = (rank+1) * myM − 1 ;
for ( c = 0 ; c < n ; c++)
for ( r = startRow ; r <= lastRow ; r++)
y[ r ] += A[ r][ c] * x[
y[r] = 0 . 0 ;
Even number of rows
for ( c = 0 ; c < n ; c++)
per processor and m, n, threads
y[ r ] += A[r][c] x[c] ;
are global variables
Note: Works because we don't alter
return NULL ;
the original data
Calculate π
Calculation of π
• π = 4(1-1/3+1/5-1/7+ … )
• Sequential version
Failed Pthread version
void* Thread_sum ( void *rank )
{ long myRank = ( long ) (*rank) ;
double factor ;
double factor = 1.0, sum = 0.0;
long long i, myN = n/threads;
for (i=0; i<n; i++, factor = -factor)
long long first = myN*myRank
{ sum += factor/2*i+1; }
long long last = first+myN;
pi = 4*sum;
if ( first % 2 == 0) factor = 1 . 0 ;
else factor = −1.0;
The statement that updates
for ( i=first; i<last ; i++, factor =−factor )
sum is a critical section
{ sum += factor / ( 2*i + 1 ) ; }
Busy Wait Solutions
Update sum in the loop
Sum after the loop
void* Thread_sum ( void *rank )
void* Thread_sum ( void *rank )
{ long myRank = ( long ) (*rank) ;
{ long myRank = ( long ) (*rank) ;
double factor, sum=0;
double factor, mySum=0.0;
long long i, myN = n/threads;
long long i, myN = n/threads;
long long first = myN*myRank;
long long first = myN*myRank;
long long last = first + myN;
if ( first % 2 == 0) factor = 1.0;
if ( first % 2 == 0) factor = 1 . 0 ;
e l s e factor = −1.0;
else factor = −1.0;
for (i=first;i<first+myN;i++,factor=−factor)
for (i=first; i<last ; i++,factor=-factor)
{ mySum += factor / ( 2*i + 1 ) ; }
while (flag !=myRank);
while (flag !=myRank);
sum += factor / ( 2*i + 1 ) ;
sum += mySum;
flag = (flag+1)%threads;
flag = (flag+1)%threads;
} }
MUTEX Solution
void* Thread_sum ( void *rank )
{ long myRank = ( long ) (*rank) ;
double factor, mySum=0;
long long i, myN = n/threads;
long long first = myN*myRank, first+myN;
if ( first % 2 == 0) factor = 1 . 0 ;
e l s e factor = −1.0;
f o r (i=first; i<last ; i++, factor =−factor)
mySum += factor / ( 2*i + 1 ) ; }
sum += mySum;
Time Comparison
Busy wait slower, but order of execution is deterministic
Sending Messages to Threads
Each Processor stores a message to the next array index
Solution with Mutexes is not obvious
Initialize the Semaphores to 0 (locked), then unlock before lock
Failure: Critical Section
Solution with Semaphores
void* Send_msg ( void* arg )
void* Send_msg ( void* arg )
{ long rank = ( long )(*arg) ;
{ long rank = ( long ) (*arg) ;
long to = (rank+1)%threads ;
long to = (rank+1)%threads ;
char *msg=malloc (MAX*sizeof (char)) ; char *msg = malloc(MAX*sizeof(char));
sprintf (msg ,"%ld --> %ld, rank, to) ;
sprintf (msg, %ld --> %ld", rank, to) ;
msgs[ to ] = msg ;
msgs[ to ] = msg ;
printf ("%ld>%s\n",rank,msgs[rank]) ;
sem_post(&semaphores [ to] ) ;
sem_wait(&semaphores [ rank ] ) ;
printf (%ld > %s\n", rank , msgs[rank ] ) ;
Note: Pthreads does
provide for barriers
Barriers: Condition variables
/* Shared */
int counter = 0 ;
pthread_mutex_t mtx ;
Barriers: Mutex
pthread_cond_t cnd ;
/* . . . Thread work */
int thread_count ;
/* Start of the barrier */
pthread_mutex_t mtx;
pthread_mutex_lock(&mtx ) ;
/* . . . Thread work */
counter ++;
/* Start of the barrier */
i f ( counter == threads)
pthread_mutex_lock(&mtx ) ;
{ counter = 0 ;
counter ++;
pthread_cond_broadcast(&cnd ) ;
pthread_mutex_unlock(&mtx) ;
} else
while ( counter < thread_count ) ;
{ while (pthread_cond_wait(&cnd,&mtx)!=0); }
pthread_mutex_unlock(&mtx ) ;
} // Inefficient (busy-wait )
/* More thread work . . . */
/* More thread work . . . */
Read-Write Locks and Lists
• Accessing the list
pthread_rwlock_rdlock(&rwlock ) ;
entry = Find( key ) ;
pthread_rwlock_unlock(&rwlock ) ;
• Inserting into the list
pthread_rwlock_wrlock(&rwlock ) ;
Insert ( entry ) ;
pthread_rwlock_unlock(&rwlock ) ;
• Removing from the list
pthread_rwlock_wrlock(&rwlock ) ;
Delete ( entry ) ;
pthread_rwlock_unlock(&rwlock ) ;