--author Giovanni Agosta 
--title Threads
--date today
Brief overview of threads and related operation
(ALP chapter 4)
--newpage agenda
--heading Agenda


--newpage summary1
--heading Summary of Threads Operations
Creation and destruction
--beginoutput
int  pthread_create(pthread_t  *  thread, pthread_attr_t * attr, 
                    void * (*start_routine)(void *), void * arg);

void pthread_exit(void *retval);

int pthread_join(pthread_t th, void **thread_return);

int pthread_cancel(pthread_t thread);
--endoutput

  * pthread_exit is equivalent to returning from the start_routine
  * pthread_join waits for termination of a given thread and reads it return value

--newpage summary2
--heading Summary of Threads Operations

Thread ID comparison

--beginoutput
pthread_t pthread_self(void);

int pthread_equal(pthread_t thread1, pthread_t thread2);
--endoutput

--newpage cancel1
--heading Synchronous and Asynchronous Threads
Thread cancellation can be:
  * PTHREAD_CANCEL_ASYNCHRONOUS: cancelable at any point of its execution
  * PTHREAD_CANCEL_DEFERRED: cancelable only at cancellation points

--beginoutput
int pthread_setcancelstate(int state, int *oldstate);

int pthread_setcanceltype(int type, int *oldtype);
--endoutput

Thread cancellations states:
  * PTHREAD_CANCEL_ENABLE
  * PTHREAD_CANCEL_DISABLE
Implements critical section
	
--newpage cancel2
--heading Cancellation Points
Explicit:
  * pthread_testcancel

Implicit:
  * pthread_join
  * pthread_cond_wait
  * pthread_cond_timedwait
  * sem_wait
  * sigwait

Missing:
  * POSIX specifies other cancellation points, not yet implemented in Linux threads

--newpage tsdata
--heading Thread-Specific Data
  * Threads of the same process share the same address space
    - Global variables are shared
    - Heap memory is shared
  * Each thread has its own stack
    - Automatic variable are private, though they can still be accessed 
      via explicit pointers
    - Thread-specific data can also be explicitly declared
--beginoutput
int pthread_key_create(pthread_key_t *key, 
                       void (*destr_function) (void *));
int pthread_key_delete(pthread_key_t key);
int pthread_setspecific(pthread_key_t key, const void *pointer);
void * pthread_getspecific(pthread_key_t key);
--endoutput

--newpage cleanup
--heading Cleanup Handlers
Provide a mechanism to handle cleanup in case of termination via pthread_exit or pthread_cancel

--beginoutput
void pthread_cleanup_push(void (*routine) (void *), void *arg);

void pthread_cleanup_pop(int execute);

void pthread_cleanup_push_defer_np(void (*routine) (void *), void *arg);

void pthread_cleanup_pop_restore_np(int execute);
--endoutput

--newpage racecond
--heading Race Conditions and Mutexes
Problem: multiple threads accessing (reading, then writing) the same data item may cause loss of information
Solution: atomic access to shared resources

--beginoutput
int pthread_mutex_init(pthread_mutex_t  *mutex,  
                        const pthread_mutexattr_t *mutexattr);
int pthread_mutex_destroy(pthread_mutex_t *mutex);

int pthread_mutex_lock(pthread_mutex_t *mutex);
int pthread_mutex_trylock(pthread_mutex_t *mutex);
int pthread_mutex_unlock(pthread_mutex_t *mutex);
--endoutput


--newpage mutexdeadlock
--heading Mutex Deadlock
If a thread requests multiple locks on the same mutex, it may go into deadlock
--beginoutput
pthread_mutex_t fastmutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t recmutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
pthread_mutex_t errchkmutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
--endoutput
  * Fast mutex (default!): the thread holding a given lock  
    blocks waiting for its release
  * Recursive mutex: can be locked by the same thread several times
  * Error checking mutex: detects and reports double locks

--newpage mutexattr
--heading Setting Mutex Properties
Mutex kind can be changed with the following procedure:
--beginoutput
pthread_mutexattr_t attr;
pthread_mutex_t mutex;

pthread_mutexattr_init (&attr);
pthread_mutexattr_setkind_np (&attr, PTHREAD_MUTEX_ERRORCHECK_NP);
pthread_mutex_init (&mutex, &attr);
pthread_mutexattr_destroy (&attr);
--endoutput
--boldon
  -> Note that error checking and recursive mutexes are non portable
--boldoff

--newpage semaphores
--heading Semaphores
  * Mutexes only guarantee mutual exclusion
  * Polling of resource status is still needed
  * Semaphores solve this issue by providing 
    a way to synchronize on a shared counter

--beginoutput
int sem_init(sem_t *sem, int pshared, unsigned int value);

int sem_wait(sem_t *sem);
int sem_trywait(sem_t *sem);
int sem_timedwait(sem_t *sem, const struct timespec *abs_timeout);
int sem_getvalue(sem_t *sem, int *sval);

int sem_post(sem_t *sem);
--endoutput

--newpage condvar
--heading Condition Variables
  * Cover a similar problem to semaphore
  * Condition variables have no memory
  * If a condition is signalled when no one is waiting 
    on it, the signal is lost
--beginoutput
int pthread_cond_init(pthread_cond_t *cond, pthread_condattr_t *cond_attr);
int pthread_cond_destroy(pthread_cond_t *cond);

int pthread_cond_signal(pthread_cond_t *cond);
int pthread_cond_broadcast(pthread_cond_t *cond);

int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex);
int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t
                           *mutex, const struct timespec *abstime);
--endoutput

--newpage condvar2
--heading Waiting on Condition Variables

  * Each condition variable needs an associated mutex
  * The mutex is initialized separately
	
  * The mutex must be locked before calling pthread_cond_wait
  * pthread_cond_wait unlocks the mutex and blocks
  * When the condition is signalled, the mutex is reaquired 
    and the waiting thread unblocks
		

--newpage condvar2
--heading Signalling Condition Variables

  * pthread_cond_signal unblocks one blocked thread
  * pthread_cond_broadcast unblocks all blocked thread

  * Before calling either function, the corresponding mutex
    must be locked
  * After calling these functions, the the corresponding mutex
    must be unlocked

--newpage linuxthreads
--heading Linux Threads Implementations

  * LinuxThreads: original implementation, not supported since glibc 2.4
  * Native POSIX Threading Library: current implementation
    - More conformance with POSIX
    - Still missing unified niceness value

  * NPTL employs:
--beginoutput
int clone(int (*fn)(void *), void *child_stack,
          int flags, void *arg, ...

int futex(int *uaddr, int op, int val, const struct timespec *timeout,
          int *uaddr2, int val3);
--endoutput

--newpage clone
--heading The clone System Call

--beginoutput
syscall(int SYS_clone, void *child_stack, int flags
--endoutput
 
  * Flags define what the child inherits from the parent
    and which memory regions are shared
 
  * child_stack is the topmost address of the stack for the child
 
  * The clone function wraps the syscall, handling:
    - The function executed by the child process
    - Its arguments

--newpage futex1
--heading Fast Userspace Locking and the futex System Call
--beginoutput
syscall(SYS_futex, void *addr1, int op, int val1, 
        struct timespec *timeout, void *addr2, int val3)
--endoutput
  * The futex mechanism is based on atomically accessible 
    variables (*uaddr), similar to semaphore counters
  * futex variables are handled in userspace until lock 
    contention arises
  * The futex system call allows
    - Waiting for a value change at a given memory address 
    - Waking up anyone waiting at a given memory address 
  * Operations (op) include 
    - FUTEX_WAIT: suspend (possibly until a specified <timeout>)
    - FUTEX_WAKE: wake up <val> waiting threads
    - FUTEX_WAKEUP_OP: conditional wake up 

--newpage futex2
--heading Fast Userspace Locking and the futex System Call

  * futexes are a rather general mechanism

  * Can be used to implement all high-level synchronization and mutual 
    exclusion primitives (semaphores, mutexes, condition variables)

  * The are also very low level, and require a (slow) system call

  * Thus, most of the work is actually performed in userspace
