LinuxQuestions.org - how to have fifo thread mutex in C

Page 1 of 2

Show 50 post(s) from this thread on one page

- Programming (https://www.linuxquestions.org/questions/programming-9/)

- - how to have fifo thread mutex in C (https://www.linuxquestions.org/questions/programming-9/how-to-have-fifo-thread-mutex-in-c-896655/)

golden_boy615

08-10-2011 11:45 PM

how to have fifo thread mutex in C

hello
I want to write a multi thread program that its thread have an access to a shared resource I want to have FIFO mutex on this resource I mean each thread that blocked on the mutex first then unblocked first.is this possible in threads .

ta0kira

08-12-2011 12:48 PM

I don't think it's directly possible with pthread mutexes; however, you can fake it with some pthread barriers, conditions, and mutexes. I was curious about the problem so I came up with a working solution.

Code:

#include <stdio.h>

#include <time.h>

#include <unistd.h>

#include <string.h>

#include <pthread.h>





#define QUEUE_SIZE    32

#define TOTAL_THREADS 8

#define ITERATIONS    8





typedef pthread_barrier_t *waiting_thread;





//struct to simulate fifo-based mutex-------------------------------------------



typedef struct

{

        int              init;

        waiting_thread    threads[QUEUE_SIZE];

        pthread_mutex_t  mutex, change_mutex;

        pthread_cond_t    queue_change;

        pthread_barrier_t queuing_sync;

        unsigned int      front, count, max;

} thread_queue;



static thread_queue main_queue = { 0 };



//END struct to simulate fifo-based mutex---------------------------------------





//functions for dealing with queue----------------------------------------------



//initialize the queue

static int init_main_queue()

{

        if (main_queue.init) return 1;

        memset(main_queue.threads, 0, sizeof(waiting_thread) * QUEUE_SIZE);

        if (pthread_mutex_init(&main_queue.mutex, NULL) != 0)            return 0;

        if (pthread_mutex_init(&main_queue.change_mutex, NULL) != 0)      return 0;

        if (pthread_cond_init(&main_queue.queue_change, NULL) != 0)      return 0;

        if (pthread_barrier_init(&main_queue.queuing_sync, NULL, 2) != 0) return 0;

        main_queue.front = 0;

        main_queue.count = 0;

        main_queue.max  = QUEUE_SIZE;

        main_queue.init  = 1;

        return 1;

}





//clean up the queue

static int fini_main_queue()

{

        //(there should probably be some sort of 'main_queue.threads' cleanup)



        if (!main_queue.init) return 1;

        pthread_mutex_destroy(&main_queue.mutex);

        pthread_mutex_destroy(&main_queue.change_mutex);

        pthread_cond_destroy(&main_queue.queue_change);

        pthread_barrier_destroy(&main_queue.queuing_sync);

        return 1;

}





//push a thread onto the queue (from the thread itself)

static int push_waiting_thread(waiting_thread tThread)

{

        //obtain a queue lock

        if (!main_queue.init || pthread_mutex_lock(&main_queue.mutex) != 0) return 0;

        if (main_queue.count == main_queue.max)

        {

        pthread_mutex_unlock(&main_queue.mutex);

        return 0;

        }



        //notify the queuing thread of a change; the lock on the mutex will

        //prevent it from continuing until the change is made

        pthread_cond_broadcast(&main_queue.queue_change);



        //add the thread to the queue

        main_queue.threads[(main_queue.front + main_queue.count++) % main_queue.max] = tThread;

        pthread_mutex_unlock(&main_queue.mutex);



        return 1;

}





//wait for access to the "resource" (from the thread needing it)

static int wait_for_resource(waiting_thread tThread, int iIdentity)

{

        //add the thread to the queue and wait for its number to come up

        if (!push_waiting_thread(tThread)) return 0;

        fprintf(stderr, "thread %i waiting\n", iIdentity);

        int outcome = pthread_barrier_wait(tThread);

        return !outcome || outcome == PTHREAD_BARRIER_SERIAL_THREAD;

}





//"unlock" the simulated mutex when finished with it

static int finish_with_resource()

{

        if (!main_queue.init) return 0;

        int outcome = pthread_barrier_wait(&main_queue.queuing_sync);

        return !outcome || outcome == PTHREAD_BARRIER_SERIAL_THREAD;

}





//call the next thread in the queue (from the queuing thread)

static int pop_waiting_thread()

{

        int outcome;



        //obtain a queue lock

        if (!main_queue.init || pthread_mutex_lock(&main_queue.mutex) != 0) return 0;



        //if the queue is empty, block until a thread is added

        if (!main_queue.count)

        {

        pthread_mutex_unlock(&main_queue.mutex);

        //(there is a chance 'pthread_cond_broadcast' will happen between the

        //line above and the line below; therefore, a 'nanosleep' might be in

        //order here. a 'pthread_barrier_wait' wouldn't be any easier.)

        pthread_mutex_lock(&main_queue.change_mutex);

        pthread_cond_wait(&main_queue.queue_change, &main_queue.change_mutex);

        pthread_mutex_unlock(&main_queue.change_mutex);

        return 0;

        }



        waiting_thread next = main_queue.threads[main_queue.front++];

        --main_queue.count;

        main_queue.front %= main_queue.max;

        

        pthread_mutex_unlock(&main_queue.mutex);



        //continue the thread

        //(this will cause a segfault if 'next' has been destroyed already)

        outcome = pthread_barrier_wait(next);

        if (outcome && outcome != PTHREAD_BARRIER_SERIAL_THREAD) return 0;



        //wait for the thread to finish with the resource

        outcome = pthread_barrier_wait(&main_queue.queuing_sync);

        return !outcome || outcome == PTHREAD_BARRIER_SERIAL_THREAD;

}



//END functions for dealing with queue------------------------------------------





//threads-----------------------------------------------------------------------



//queuing thread

static void *queuing_thread(void *iIgnore)

{

        struct timespec delay = { 0, 100 * 1000 * 1000 };



        if (!main_queue.init) return NULL;

        fprintf(stderr, "queuing thread started\n");

        while (1)

        {

        pop_waiting_thread();

        nanosleep(&delay, NULL);

        }

        fprintf(stderr, "queuing thread finished\n");

        return NULL;

}





//worker threads

static void *worker_thread(void *iIdentity)

{

        int identity = *(int*) iIdentity, iterations = ITERATIONS;

        

        fprintf(stderr, "thread %i started\n", identity);



        pthread_barrier_t barrier;



        if (pthread_barrier_init(&barrier, NULL, 2) != 0) return NULL;



        while (iterations-- > 0 && wait_for_resource(&barrier, identity))

        {

        fprintf(stderr, "thread %i has the resource (%i)\n", identity, iterations);

        finish_with_resource();

        fprintf(stderr, "thread %i is done with the resource (%i)\n", identity, iterations);

        }



        pthread_barrier_destroy(&barrier);



        fprintf(stderr, "thread %i finished\n", identity);



        return NULL;

}



//END threads-------------------------------------------------------------------





int main()

{

        pthread_t queueing;

        pthread_t workers[TOTAL_THREADS];



        init_main_queue();



        int I;

        for (I = 0; I < TOTAL_THREADS; I++)

        {

        //create threads with a delay so they queue in order to start with

        pthread_create(workers + I, NULL, &worker_thread, &I);

        sleep(1);

        }



        pthread_create(&queueing, NULL, &queuing_thread, NULL);



        for (I = 0; I < TOTAL_THREADS; I++)

        pthread_join(workers[I], NULL);



        pthread_cancel(queueing);

        pthread_join(queueing, NULL);



        fini_main_queue();

}

As far as I know, the red comments are the only real problems with it, aside from the fact that it requires one pthread_barrier_t per worker thread. This would be an acceptable starting-point to a solution for me, but it might be a bit elaborate for your needs.
Kevin Barry

Nominal Animal

08-12-2011 01:03 PM

I believe futexes (mutexes provided by the Linux kernel) work that way by default -- the futex is granted in the same order it was requested.

You can also build a FIFO mutex using pthreads, if you really need such a type. (Why would you, really?) You'll need a normal mutex, a condition variable (to wake up all waiters via broadcast), and two counters. In C:

Code:

typedef struct fifo_mutex        fifo_mutex_t;

struct fifo_mutex {

        pthread_mutex_t          mutex;

        pthread_cond_t          cond;

        int                      worker;

        int                      waiter;

};

#define FIFO_MUTEX_INITIALIZER { PTHREAD_MUTEX_INITIALIZER, \

                                PTHREAD_COND_INITIALIZER, \

                                1, 0 }

To lock the FIFO mutex:

First lock the mutex in the structure. This may block.
Increase the waiter counter.
Save the waiter counter value for later. If the mutex is contended, the saved value will tell when its turn for this thread to own the mutex.
If the worker counter matches the waiter counter, this thread has locked the FIFO mutex successfully.
Otherwise:
The thread will wait on the condition variable, releasing the mutex atomically. (The pthread_cond_wait() function does exactly this.)
When woken, the thread will first obtain the mutex (pthread_cond_wait() does this automatically, too). If the worker counter matches the saved counter value, this thread has grabbed the mutex successfully.
Otherwise, repeat this step until successful.

To release the FIFO mutex:

Increase the worker counter.
Broadcast on the condition variable, waking up all threads waiting on the condition variable. (The pthread_cond_broadcast() function does exactly this.)
Release the mutex in the structure.

The above is best written into helper functions, say fifo_mutex_lock() and fifo_mutex_unlock(), that can be used similarly to pthread_mutex_lock and pthread_mutex_unlock . (fifo_mutex_trylock is trivial to implement should you need it.)

The above is not susceptible to thread starvation, although the "cost" of acquiring and releasing the mutex will increase as the number of threads blocking on it increases. (If heavily contended, it does suffer from the "thundering herd" problem, since every thread blocking on it will have to be briefly woken up to find the next thread, whenever the mutex changes state.) If uncontended, the above is just two additions "heavier" than a normal mutex.

If a thread is cancelled while blocking on the mutex (waiting on the condition variable), the mutex will end up deadlocking. You can avoid that by changing the thread cancelability state temporarily, before starting to wait on the condition variable, and restoring the state when the mutex has been grabbed.

There are of course totally different approaches, for example a queue or chain of threads, where each thread releasing the mutex will wake up only the next waiting thread. These tend to be much more complex to code correctly than the above, however.

ta0kira

08-12-2011 02:16 PM

Quote:

Originally Posted by Nominal Animal (Post 4441097)

I believe futexes (mutexes provided by the Linux kernel) work that way by default -- the futex is granted in the same order it was requested.

That doesn't sound like a simple solution to this problem. From man 2 futex:

Quote:

Callers of this function are expected to adhere to the semantics as set out in futex(7). As these semantics involve writing non-portable assembly instructions, this in turn probably means that most users will in fact be library authors and not general application developers.

Kevin Barry

Nominal Animal

08-12-2011 04:58 PM

@ta0kira: No, but glibc pthread library uses futexes by default on Linux. But see the end of this post for a serious caveat.

Here's the implementation of fifo_mutex_t I was thinking about. Save this as fifo-mutex.h for example:

Code:

#ifndef  FIFO_MUTEX_H

#define  FIFO_MUTEX_H

#include <pthread.h>

#include <errno.h>



/* First in, first out mutex type and associated inline functions.

 * Note that the type is not async signal safe, and therefore should

 * not be used in signal handlers; this is due to condition variables

 * used in the structure internally.

*/



typedef struct fifo_mutex        fifo_mutex_t;

struct fifo_mutex {

        pthread_mutex_t                mutex;

        pthread_cond_t                cond;

        unsigned int                worker;

        unsigned int                waiter;

};



#define FIFO_MUTEX_INITIALIZER { PTHREAD_MUTEX_INITIALIZER, \

                                PTHREAD_COND_INITIALIZER, \

                                1U, 0U }



static inline int fifo_mutex_init(fifo_mutex_t *const fifo,

                                  pthread_mutexattr_t *const mutexattr,

                                  pthread_condattr_t *const condattr)

{

        if (fifo) {



                pthread_mutex_init(&(fifo->mutex), mutexattr);

                pthread_cond_init(&(fifo->cond), condattr);



                fifo->worker = 1U;

                fifo->waiter = 0U;



                return 0;

        }

        return EINVAL;

}



static inline int fifo_mutex_unlock(fifo_mutex_t *const fifo)

{

        int result;



        if (fifo) {

                fifo->worker++;



                result = pthread_cond_broadcast(&(fifo->cond));

                if (result)

                        return result;



                result = pthread_mutex_unlock(&(fifo->mutex));

                if (result)

                        return result;



                return 0;

        }

        return EINVAL;

}



static inline int fifo_mutex_lock(fifo_mutex_t *const fifo)

{

        int result, waiter;



        if (fifo) {



                /* Atomic preincrement */

                waiter = __sync_add_and_fetch((int *)&(fifo->waiter), (int)1);



                /* Obtain the mutex */

                result = pthread_mutex_lock(&(fifo->mutex));

                if (result)

                        return result;



                /* This thread? */

                if (waiter == fifo->worker)

                        return 0;



                while (waiter != fifo->worker)

                        pthread_cond_wait(&(fifo->cond), &(fifo->mutex));



                return 0;

        }

        return EINVAL;

}



#endif /* FIFO_MUTEX_H */

Here is an example program you can use to test the type.

Code:

#include <pthread.h>

#include <string.h>

#include <errno.h>

#include <stdio.h>



#ifdef USE_PTHREAD_MUTEX_INSTEAD



#define MUTEX_TYPE "pthread_mutex_t"

#define fifo_mutex_t            pthread_mutex_t

#define fifo_mutex_lock(p)      pthread_mutex_lock(p)

#define fifo_mutex_unlock(p)    pthread_mutex_unlock(p)

#define FIFO_MUTEX_INITIALIZER  PTHREAD_MUTEX_INITIALIZER



#else



#define MUTEX_TYPE "fifo_mutex_t"

#include "fifo-mutex.h"



#endif



#ifndef  MAX_THREADS

#define  MAX_THREADS 65536

#endif



#ifndef  LOOPS

#define  LOOPS 10

#endif



fifo_mutex_t        lock = FIFO_MUTEX_INITIALIZER;

volatile int        working = 0;



void *worker(void *payload)

{

        long const id    = (long)payload;

        long      loop;



        printf("Thread %ld waiting on the %s lock.\n", id, MUTEX_TYPE);

        fflush(stdout);



        working = 1;



        fifo_mutex_lock(&lock);



        for (loop = 1L; loop <= (long)LOOPS; loop++) {





                printf("Thread %ld is holding the %s lock; loop %ld of %ld.\n",

                      id, MUTEX_TYPE, loop, (long)LOOPS);

                fflush(stdout);



                fifo_mutex_unlock(&lock);

                fifo_mutex_lock(&lock);



        }



        printf("Thread %ld is holding the %s lock for the final time.\n",

              id, MUTEX_TYPE);

        fflush(stdout);



        fifo_mutex_unlock(&lock);



        return 0;

}



int main(int argc, char *argv[])

{

        pthread_t        thread[MAX_THREADS];

        long                threads, asked, i;

        int                result;

        char                dummy;



        if (argc != 2) {

                fprintf(stderr, "\n"

                                "Usage: %s threads\n"

                                "\n"

                                "This is an example program for the %s type.\n"

                                "\n"

                              , argv[0], MUTEX_TYPE);

                return 2;

        }



        if (sscanf(argv[1], "%ld %c", &threads, &dummy) != 1) {

                fprintf(stderr, "%s: Invalid number of threads.\n", argv[1]);

                return 1;

        }

        if (threads < 1) {

                fprintf(stderr, "%s: Too few threads.\n", argv[1]);

                return 1;

        }

        if (threads > MAX_THREADS) {

                fprintf(stderr, "%s: Too many threads. Maximum is %d.\n", argv[1], MAX_THREADS);

                return 1;

        }



        result = fifo_mutex_lock(&lock);

        if (result) {

                char const *const error = strerror(result);

                fprintf(stderr, "Cannot initialize %s lock: %s [%d].\n", MUTEX_TYPE, error, result);

                return 1;

        }



        asked = threads;

        threads = 0L;

        while (threads < asked) {



                /* Clear the working flag, so that we serialize the thread creation. */

                working = 0;



                result = pthread_create(&(thread[threads]), NULL, worker, (void *)(1L + (long)threads));

                if (result) {

                        char const *const error = strerror(result);

                        fflush(stdout);

                        fprintf(stderr, "Warning: Could not create worker thread %ld: %s [%d].\n", 1L + threads, error, result);

                        fflush(stderr);

                        break;

                }



                /* Wait until the thread has set the working flag before continuing. */

                while (!working)

                        /* pthread_yield() */

                        ;



                threads++;

        }



        if (threads < 1) {

                fprintf(stderr, "No worker threads.\n");

                return 1;

        }



        printf("%ld worker threads created.\n", threads);

        printf("Main thread is releasing the %s lock:\n", MUTEX_TYPE);

        fflush(stdout);



        result = fifo_mutex_unlock(&lock);

        if (result) {

                char const *const error = strerror(result);

                fprintf(stderr, "Error releasing %s lock: %s [%d].\n", MUTEX_TYPE, error, result);

        }



        for (i = 0; i < threads; i++) {

                result = pthread_join(thread[i], NULL);

                if (result) {

                        char const *const error = strerror(result);

                        fprintf(stderr, "Error joining thread %ld of %ld: %s [%d].\n", i + 1L, threads, error, result);

                        fflush(stderr);

                }

        }



        printf("All done.\n");

        fflush(stdout);



        return 0;

}

I use __sync_add_and_fetch() to atomically preincrement and fetch the waiter number. The built-in function is provided by most newer compilers (at least GCC and Intel CC). If it is not available, the structure needs a second internal mutex to protect the waiter counter. On architectures where preincrementing an int is atomic, you could use #define __sync_add_and_fetch(ptr, value) (*(ptr) += (value)) instead.

The volatile int working flag is used to synchronize the creation of the threads (and the first printf in the worker, so they are completed in the order the threads are created), and thus the order in which they try to lock the fifo_mutex_t lock. As you can see in the worker code, the other printfs in the worker is done in the order the threads obtain the fifo_mutex_t lock. In other words, the printfs should be reliable indicators of the locking order. (The main thread will initially keep the lock locked, to make the race start from when it releases the lock.)

Each worker thread will take the mutex LOOPS times, in a tight loop. You can see in the code that the unlock and lock are right next to each other. If the fifo_mutex_t works correctly (and the workers have enough work to do to keep at least one thread blocking on it at all times), the same order should be repeated each loop.

If you define preprocessor macro USE_PTHREAD_MUTEX_INSTEAD for example using -DUSE_PTHREAD_MUTEX_INSTEAD when compiling the test program, then the fifo_mutex_t type is replaced with pthread_mutex_t (and the fifo_ functions with the pthread_ equivalents).

On all of the machines I tried (various Linux and one Solaris 10), my fifo_mutex_t worked correctly: the fifo_mutex_t lock was handed to each thread in their calling order (i.e. FIFO, first in first out), even when unlocking and locking the fifo_mutex_t in a tight loop.

On a Solaris 10 machine, pthread_mutex_t lock was always handed in a scrambled order. This means that you cannot rely on FIFO behaviour for pthread_mutex_t on non-Linux machines at all.

On the Linux machines I tested with LOOPS=0, pthread_mutex_t lock was handed to each thread in the calling order (FIFO). However, if the same thread tries to acquire the pthread_mutex_t , i.e. LOOPS>0 in the example program above, the order gets scrambled. This means that you can rely on FIFO order for pthread_mutex_t locks on Linux only the first time a thread acquires that mutex (since the last time the mutex was free, with no threads blocking on it).

I hope you find this useful,

ta0kira

08-12-2011 06:13 PM

Quote:

Originally Posted by Nominal Animal (Post 4441272)

Code:

        while (waiter != fifo->worker)

            pthread_cond_wait(&(fifo->cond), &(fifo->mutex));

You provided an interesting solution; however, only one thread can block on the pthread condition per mutex. Since you only have one mutex, you can only have one thread blocking on pthread_cond_wait at a time. That means all but one thread waiting for the futex will be eating up resources in a spinlock.

Have you looked at my solution? I'm not saying it's better, but I'd like to hear your comments. I'm sure some combination of our solutions would be ideal.
Kevin Barry

ta0kira

08-12-2011 07:52 PM

Quote:

Originally Posted by Nominal Animal (Post 4441097)

If a thread is cancelled while blocking on the mutex (waiting on the condition variable), the mutex will end up deadlocking. You can avoid that by changing the thread cancelability state temporarily, before starting to wait on the condition variable, and restoring the state when the mutex has been grabbed.

There are of course totally different approaches, for example a queue or chain of threads, where each thread releasing the mutex will wake up only the next waiting thread. These tend to be much more complex to code correctly than the above, however.

I was inspired by your "chain" suggestion and I implemented a linked-list solution to the problem.

Code:

#include <stdio.h>

#include <stdlib.h>

#include <pthread.h>





#define TOTAL_THREADS 8

#define ITERATIONS    8





struct thread_cell;



struct thread_cell

{

    int canceled;

    struct thread_cell *next;

    pthread_mutex_t mutex;

    pthread_cond_t  condition;

};





struct fifo_mutex

{

    struct thread_cell *head, *tail;

    pthread_mutex_t queue_mutex, resource_mutex;

};





struct fifo_mutex global_mutex;





static int init_thread_cell(struct thread_cell *cCell)

{

    cCell->canceled = 0;

    cCell->next = NULL;

    //(return-val checks skipped for clarity)

    pthread_mutex_init(&cCell->mutex, NULL);

    pthread_cond_init(&cCell->condition, NULL);

    return 1;

}





static int fini_thread_cell(struct thread_cell *cCell)

{

    cCell->canceled = 1;

    cCell->next = NULL;

    //(return-val checks skipped for clarity)

    pthread_mutex_destroy(&cCell->mutex);

    pthread_cond_destroy(&cCell->condition);

    return 1;

}





static int init_fifo_mutex(struct fifo_mutex *mMutex)

{

    mMutex->head = mMutex->tail = NULL;

    //(return-val checks skipped for clarity)

    pthread_mutex_init(&mMutex->queue_mutex, NULL);

    pthread_mutex_init(&mMutex->resource_mutex, NULL);

    return 1;

}





static int fini_fifo_mutex(struct fifo_mutex *mMutex)

{

    pthread_mutex_lock(&mMutex->queue_mutex);



    struct thread_cell *current_cell = mMutex->head;

    while (current_cell)

    {

    struct thread_cell *old_cell = current_cell;

    current_cell = current_cell->next;

    fini_thread_cell(old_cell);

    }



    mMutex->head = mMutex->tail = NULL;

    //(return-val checks skipped for clarity)

    pthread_mutex_destroy(&mMutex->queue_mutex);

    pthread_mutex_destroy(&mMutex->resource_mutex);

    return 1;

}





typedef struct

{

    struct fifo_mutex  *mutex;

    struct thread_cell *cell;

} cleanup_specs;





static void fifo_wait_cleanup(cleanup_specs *sSpecs)

{

    //unfortunately this is a potential deadlock if this thread is the next

    //in line for obtaining a lock and the unlock process has started

    pthread_mutex_lock(&sSpecs->mutex->queue_mutex);

    sSpecs->cell->canceled = 1;

    pthread_mutex_unlock(&sSpecs->mutex->queue_mutex);

}





static int fifo_mutex_lock(struct fifo_mutex *mMutex)

{

    //(certain obvious checks skipped for clarity)



    pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);

    pthread_mutex_lock(&mMutex->queue_mutex);



    struct thread_cell *new_cell = (struct thread_cell*) malloc(sizeof(struct thread_cell));

    init_thread_cell(new_cell);



    cleanup_specs cancel_specs = { mMutex, new_cell };

    pthread_cleanup_push((void(*)(void*)) &fifo_wait_cleanup, (void*) &cancel_specs);



    int first_cell = mMutex->tail == NULL;



    if (!first_cell) mMutex->tail->next = new_cell;

    else            mMutex->head = new_cell;

    mMutex->tail = new_cell;



    pthread_mutex_unlock(&mMutex->queue_mutex);

    pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);

    pthread_testcancel();





    pthread_mutex_lock(&new_cell->mutex);



    if (!first_cell)

    pthread_cond_wait(&new_cell->condition, &new_cell->mutex);



    pthread_cleanup_pop(0);



    pthread_mutex_lock(&mMutex->resource_mutex);



    //NOTE: no need to unlock 'new_cell->mutex' since destruction in

    //'fifo_mutex_unlock' will (or already has) done that



    return 1;

}





static int fifo_mutex_unlock(struct fifo_mutex *mMutex)

{

    //(certain obvious checks skipped for clarity)



    pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);

    pthread_mutex_lock(&mMutex->queue_mutex);



    pthread_mutex_unlock(&mMutex->resource_mutex);



    struct thread_cell *current = mMutex->head;



    while (current)

    {

    mMutex->head = mMutex->head->next;

    fini_thread_cell(current);

    free(current);

    current = mMutex->head;



    if (!mMutex->head) mMutex->tail = NULL;



    else if (!mMutex->head->canceled)

    {

    pthread_cond_broadcast(&mMutex->head->condition);

    break;

    }

    }



    pthread_mutex_unlock(&mMutex->queue_mutex);

    pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);

    pthread_testcancel();



    return 1;

}





static void *worker_thread(void *iIdentity)

{

    struct timespec delay = { 0, 100 * 1000 * 1000 };



    int identity = *(int*) iIdentity, iterations = ITERATIONS;



    fprintf(stderr, "thread %i started\n", identity);



    sleep(1);



    while (iterations-- > 0 && fifo_mutex_lock(&global_mutex))

    {

    fprintf(stderr, "thread %i has the resource (%i)\n", identity, iterations);

    nanosleep(&delay, NULL);

    fprintf(stderr, "thread %i is done with the resource (%i)\n", identity, iterations);

    fifo_mutex_unlock(&global_mutex);

    }



    fprintf(stderr, "thread %i finished\n", identity);



    return NULL;

}





int main()

{

    struct timespec delay = { 0, 10 * 1000 * 1000 };



    pthread_t workers[TOTAL_THREADS];



    init_fifo_mutex(&global_mutex);



    int I;

    for (I = 0; I < TOTAL_THREADS; I++)

    {

    //create threads with a delay so they queue in order to start with

    pthread_create(workers + I, NULL, &worker_thread, &I);

    nanosleep(&delay, NULL);

    }



    for (I = 0; I < TOTAL_THREADS; I++)

    pthread_join(workers[I], NULL);



    fini_fifo_mutex(&global_mutex);

}

It should be safe to cancel a thread while it's waiting for a lock (it's been fine in my rudimentary tests so far). I'm not really sure what the cost are for creating and destroying mutexes and conditions for every lock, but it seems to be the only solution that allows thread cancellation and no queuing thread.
Kevin Barry

Nominal Animal

08-12-2011 08:59 PM

Quote:

Originally Posted by ta0kira (Post 4441306)

You provided an interesting solution; however, only one thread can block on the pthread condition per mutex.

No, there is no such limit. (If there were, it would make no sense having separate pthread_cond_signal() and pthread_cond_broadcast() functions.)

Each thread hitting the pthread_cond_wait(cond,mutex) will atomically unlock the mutex and then wait on the condition variable. (The "atomic" just means it is not possible for a condition to slip in between unnoticed, as long as the condition signaler or broadcaster holds that same mutex.)

When the condition variable is broadcast, each and every thread waiting on it is woken up, internally automatically re-acquiring the mutex in turn. With futex-based pthread mutexes, that will happen in FIFO order, as long as none of the threads will try to re-acquire mutex (or there are no other threads blocking on the mutex).

In my implementation, each woken up thread will check if it is their turn, and if not, go back to waiting on the condition variable, again atomically releasing the mutex. Those threads that were woken up but did not get the mutex before the correct thread, will keep on blocking on the mutex until the correct thread releases it, and they get to run. They will eventually get to run, because the scheduler will give each thread some CPU time, eventually. If it is then still not their time to run, they will go back to waiting on the condition variable (again, releasing the mutex atomically).

Therefore, there may be any number of threads, with some blocking on the mutex, and others waiting on the condition variable. There are no limits as far as I can see. I did run tests with 300 threads (on both 32-bit and 64-bit unicore and multicore CPUs). For more threads I'd need to set a smaller per-thread stack.

Quote:

Originally Posted by ta0kira (Post 4441306)

Have you looked at my solution? I'm not saying it's better, but I'd like to hear your comments. I'm sure some combination of our solutions would be ideal.

Not too closely; I thought it is more complex than really necessary. I'd need to draw a graph to understand the scheme well enough to comment on it -- and I was too lazy to do that, sorry. ;)
It does look interesting, especially the queue approach. Is there a specific feature I should have noticed?
I dislike arbitrary limits, though. (But I do think it is possible to change the array to a dynamically grown one if necessary.)

Perhaps a third scheme would work better?

Use a queue (a circular buffer?) of mutexes, so that each thread blocks on the mutex belonging to its predecessor, with indices either accessed using atomic builtins or protected by a separate mutex. Each thread will first lock their own (new) mutex. If there is no predecessor mutex, the thread then owns the FIFO lock. Otherwise, the thread will lock the predecessor mutex, blocking on it. When the thread obtains the mutex, it will unlock and discard it. Then it will own the FIFO lock. To release the FIFO lock, the thread will just unlock its own mutex.

At any point, there will be at most one thread blocking on each mutex. The correct thread is thus always woken up, when the holding thread releases its mutex. (The locking and unlocking order in the code is critical, to avoid the possibility of deadlock.)

If the mutexes are stored in a circular buffer, the buffer cannot be reallocated while there are mutexes in it (because the kernel depends on the address when there is a thread blocked on a futex in Linux). A linked list would allow dynamic growth, but to eliminate malloc() overhead, one would have to use allocation pools; thus more complex code.

This third scheme should be able to rely only on mutexes, so it should be usable wherever mutexes are. Condition variables cannot be used in signal handlers, for example. None of the pthread_mutex functions are cancellation points, so this third scheme would have no pthread cancellation points either. (Both our implementations currently have cancellation points, although by default, pthreads cancellations are deferred, so it should not matter.) Unlike my implementation, this scheme should not suffer from the thundering herd problem: each mutex is only blocked by a single thread. The "cost" should also be fixed, not depend on the number of threads blocking on the structure. I believe it would be more efficient in general than either of our implementations.

ta0kira

08-12-2011 09:45 PM

Quote:

Originally Posted by Nominal Animal (Post 4441381)

No, there is no such limit. (If there were, it would make no sense having separate pthread_cond_signal() and pthread_cond_broadcast() functions.)

Until now I was under the impression that each thread had its own mutex but there was only one condition, which would still have been conducive to separate signal/broadcast functions. I guess I misinterpreted the implications of this section of the manpage:

Quote:

pthread_cond_wait atomically unlocks the mutex (as per pthread_unlock_mutex) and waits for the condition variable cond to be signaled. The thread execution is suspended and does not consume any CPU time until the condition variable is signaled. The mutex must be locked by the calling thread on entrance to pthread_cond_wait. Before returning to the calling thread, pthread_cond_wait re-acquires mutex (as per pthread_lock_mutex).

It hadn't occurred to me that the mutex being unlocked during pthread_cond_wait allowed it to be locked by another thread joining the wait. I suppose I thought the mutex was what allowed each thread to block and then continue all at once.
Kevin Barry

ta0kira

08-12-2011 11:08 PM

Quote:

Originally Posted by Nominal Animal (Post 4441381)

Perhaps a third scheme would work better?

Use a queue (a circular buffer?) of mutexes, so that each thread blocks on the mutex belonging to its predecessor, with indices either accessed using atomic builtins or protected by a separate mutex. Each thread will first lock their own (new) mutex. If there is no predecessor mutex, the thread then owns the FIFO lock. Otherwise, the thread will lock the predecessor mutex, blocking on it. When the thread obtains the mutex, it will unlock and discard it. Then it will own the FIFO lock. To release the FIFO lock, the thread will just unlock its own mutex.

At any point, there will be at most one thread blocking on each mutex. The correct thread is thus always woken up, when the holding thread releases its mutex. (The locking and unlocking order in the code is critical, to avoid the possibility of deadlock.)

This sounds like it would work. My first solution used a queue of barriers and an extra "queuing" thread to continue the next in line. The barriers made sure the queuing thread didn't get too far ahead, which wouldn't be a problem if each thread continued the next (as in your suggestion and in my second solution). Given the lack of possible cancelation you're suggesting, though, I don't think this is an improvement over your first solution. The main purpose of my second solution was to allow thread cancellation without consequences, and also to remove the extra thread.

Quote:

Originally Posted by Nominal Animal (Post 4441381)

A linked list would allow dynamic growth, but to eliminate malloc() overhead, one would have to use allocation pools; thus more complex code.

I can only imagine an allocation pool here as a pair of singly-linked lists embedded in an array, which would either be a fixed size or have the overhead of list regeneration when resized. I'd expect a malloced array to be the better solution if dynamic sizing is to be possible, and % can facilitate circularity (which I used in my first solution). In any case, those details can be made sufficiently opaque, and perhaps even abstracted out with callbacks. The procedure used to pass the lock is more important than the method of sequencing the data structures.

You might as well just add this to the lock/unlock functions in your first solution and call it good:

Code:

//first line of function -->

int old_state = PTHREAD_CANCEL_ENABLE;

pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old_state);



//function stuff here...



//before return -->

pthread_setcancelstate(old_state, NULL);

Kevin Barry

PS This might be nitpicking, but I'm wondering about idiosyncrasies related to possible integer overflow in the indexing used in your first solution. I haven't used an architecture that hasn't wrapped in such cases, however, and in fact I don't even know of one.

Nominal Animal

08-13-2011 01:41 AM

Quote: