Pthreads – Create and Join IEEE Portable Operating System Interface Standard (POSIX) • Spawn an attached thread pthread_create (&thread1, NULL, proc1, &arg) . . . pthread_join(thread1, status) • Thread execution void proc1(&arg) { // Thread code return(*status); } • Detatched threads – Join is not needed – The OS destroys thread resources when they terminate – A parameter in the create call indicates a detached thread Note: The Pthreads library must be available : #include <pthread.h> Executing a Thread Locks • Declare a lock: pthread_mutex_t mutex; • Declare a mutex attribute: pthread_mutexattr_t mta; • Initializing an attribute (spin_only, limited_spin, no_spin, recursive, metered) pthread_mutexattr_init(&mta); pthread_mutexattr_settype(&mta, PTHREAD_MUTEX_RECURSIVE); pthread_mutexattr_setname_np(&mta, "My Mutex"); • Initialize a mutex pthread_mutex_init(&mutex, NULL); pthread_mutex_init(&mutex, &mta); // Use defaults // or use designated attributes • Enter and release Pthread_mutex_lock(&mutex); and pthread_mutex_unlock(&mutex); • Try Lock without block: pthread_mutex_trylock(&mutex); • Release resources pthread_mutex_destroy(mutex); and pthread_mutexattr_destroy(&mta); Semaphores Required: # include <semaphore . h> Semaphores are not part of Pthreads • Initialize int sem_init ( sem_t *sem, int shared, /* non-zero to share between processes */ unsigned initial_val ) ; /* one works like a mutex */ • Destroy semaphore: int sem_destroy ( sem_t sem) ; • Post (V): int sem_post ( sem_t sem ) ; • Wait (P): int sem_wait ( sem_t sem ) ; Semaphores can count up or down and can start with any positive integer value Condition Variables action() { .. pthread_mutex_lock(&mutex); while (c <> 0) pthread_cond_wait(cond,mutex); pthread_mutex_unlock(&mutex); take_action(); .. } counter() { .. pthread_mutex_lock(&mutex); c--; if (c == 0) pthread_cond_signal(cond); .. pthread_mutex_unlock(&mutex); .. } Note: Signals are missed if a thread is not waiting when they are sent Read-Write Locks More than reader is allowed, however, writing is exclusive • Initialize int pthread_rwlock_init (pthread_rwlock_t *readWriteLock , const pthread_rwlockattr_t attr_p ) ; • Locking i n t pthread_rwlock_rdlock( pthread_rwlock_t readWriteLock ) ; i n t pthread_rwlock_wrlock( pthread_rwlock_t readWriteLock ) ; • Unlock: int pthread_rwlock_unlock( pthread_rwlock_t readWriteLock ) ; • Destroy: int pthread_rwlock_destroy ( pthread_rwlock_t readWriteLock ) ; Practical Example: Multithreaded linked lists or binary trees Hello World: Pthreads Make sure to include: <stdio.h>, <stdlib.h>, and <pthread.h> void* Hello ( void* myRank ) { printf ( "Hello from thread %ld\n" , (long)(*myRank) ) ; return NULL ; } void main ( int argc , char argv [ ] ) { long t; pthread_t[] threadHandles ; int threads = strtol ( argv [ 1 ] , NULL , 1 0 ) ; thread_handles = malloc ( threads * sizeof( pthread_t ) ) ; for ( t = 0 ; t< threads; t ++ ) pthread_create(&threadHandles [ t ] , NULL ,Hello , ( void *) &t ); printf ( "Hello from the main thread\n" ) ; for ( t= 0 ; t < threads; t ++) pthread_join ( threadHandles [ t ] , NULL ) ; free( threadHandles ) ; } Matrix Multiplication Pthreads Version void* matrixMult( void *rank ) { i nt r, c , myM = m / (int)threads ; Sequential Version long myRank = (long)(*rank); for ( r = 0 ; r < m ; r++) int startRow = myRank * myM ; { y [ r ] = 0.0 ; int lastRow = (rank+1) * myM − 1 ; for ( c = 0 ; c < n ; c++) for ( r = startRow ; r <= lastRow ; r++) y[ r ] += A[ r][ c] * x[ c]; { y[r] = 0 . 0 ; } Assumption: Even number of rows for ( c = 0 ; c < n ; c++) per processor and m, n, threads y[ r ] += A[r][c] x[c] ; are global variables } Note: Works because we don't alter return NULL ; the original data } Calculate π Calculation of π • π = 4(1-1/3+1/5-1/7+ … ) • Sequential version Failed Pthread version void* Thread_sum ( void *rank ) { long myRank = ( long ) (*rank) ; double factor ; double factor = 1.0, sum = 0.0; long long i, myN = n/threads; for (i=0; i<n; i++, factor = -factor) long long first = myN*myRank { sum += factor/2*i+1; } long long last = first+myN; pi = 4*sum; if ( first % 2 == 0) factor = 1 . 0 ; else factor = −1.0; The statement that updates for ( i=first; i<last ; i++, factor =−factor ) sum is a critical section { sum += factor / ( 2*i + 1 ) ; } } Busy Wait Solutions Update sum in the loop Sum after the loop void* Thread_sum ( void *rank ) void* Thread_sum ( void *rank ) { long myRank = ( long ) (*rank) ; { long myRank = ( long ) (*rank) ; double factor, sum=0; double factor, mySum=0.0; long long i, myN = n/threads; long long i, myN = n/threads; long long first = myN*myRank; long long first = myN*myRank; long long last = first + myN; if ( first % 2 == 0) factor = 1.0; if ( first % 2 == 0) factor = 1 . 0 ; e l s e factor = −1.0; else factor = −1.0; for (i=first;i<first+myN;i++,factor=−factor) for (i=first; i<last ; i++,factor=-factor) { mySum += factor / ( 2*i + 1 ) ; } { while (flag !=myRank); while (flag !=myRank); sum += factor / ( 2*i + 1 ) ; sum += mySum; flag = (flag+1)%threads; flag = (flag+1)%threads; } } } MUTEX Solution void* Thread_sum ( void *rank ) { long myRank = ( long ) (*rank) ; double factor, mySum=0; long long i, myN = n/threads; long long first = myN*myRank, first+myN; if ( first % 2 == 0) factor = 1 . 0 ; e l s e factor = −1.0; f o r (i=first; i<last ; i++, factor =−factor) { mySum += factor / ( 2*i + 1 ) ; } pthread_mutex_lock(&mutex) sum += mySum; pthread_mutex_unlock(&mutex); } Time Comparison Busy wait slower, but order of execution is deterministic Sending Messages to Threads Each Processor stores a message to the next array index Solution with Mutexes is not obvious Initialize the Semaphores to 0 (locked), then unlock before lock Failure: Critical Section Solution with Semaphores void* Send_msg ( void* arg ) void* Send_msg ( void* arg ) { long rank = ( long )(*arg) ; { long rank = ( long ) (*arg) ; long to = (rank+1)%threads ; long to = (rank+1)%threads ; char *msg=malloc (MAX*sizeof (char)) ; char *msg = malloc(MAX*sizeof(char)); sprintf (msg ,"%ld --> %ld, rank, to) ; sprintf (msg, %ld --> %ld", rank, to) ; msgs[ to ] = msg ; msgs[ to ] = msg ; printf ("%ld>%s\n",rank,msgs[rank]) ; sem_post(&semaphores [ to] ) ; } sem_wait(&semaphores [ rank ] ) ; printf (%ld > %s\n", rank , msgs[rank ] ) ; } Barriers Note: Pthreads does provide for barriers Barriers: Condition variables /* Shared */ int counter = 0 ; pthread_mutex_t mtx ; Barriers: Mutex pthread_cond_t cnd ; /* . . . Thread work */ int thread_count ; /* Start of the barrier */ pthread_mutex_t mtx; pthread_mutex_lock(&mtx ) ; /* . . . Thread work */ counter ++; /* Start of the barrier */ i f ( counter == threads) pthread_mutex_lock(&mtx ) ; { counter = 0 ; counter ++; pthread_cond_broadcast(&cnd ) ; pthread_mutex_unlock(&mtx) ; } else while ( counter < thread_count ) ; { while (pthread_cond_wait(&cnd,&mtx)!=0); } ... pthread_mutex_unlock(&mtx ) ; } // Inefficient (busy-wait ) /* More thread work . . . */ /* More thread work . . . */ Read-Write Locks and Lists • Accessing the list pthread_rwlock_rdlock(&rwlock ) ; entry = Find( key ) ; pthread_rwlock_unlock(&rwlock ) ; • Inserting into the list pthread_rwlock_wrlock(&rwlock ) ; Insert ( entry ) ; pthread_rwlock_unlock(&rwlock ) ; • Removing from the list pthread_rwlock_wrlock(&rwlock ) ; Delete ( entry ) ; pthread_rwlock_unlock(&rwlock ) ;