From 64f6702a4c6f2159f2b45aed32b1f03d9b414fcf Mon Sep 17 00:00:00 2001 From: David Joseph Date: Sun, 6 Mar 2011 19:46:13 -0500 Subject: [PATCH] Implemented tournament and mcs barriers. These are the tournament and mcs barriers from "Algorithms for Scalable Synchronization on Shared-Memory Multiprocessors." Validation tests have also been added for these barriers to regressions/ck_barrier/validate. --- include/ck_barrier.h | 92 ++++-- regressions/ck_barrier/validate/Makefile | 8 +- .../validate/ck_barrier_dissemination.c | 42 +-- .../ck_barrier/validate/ck_barrier_mcs.c | 136 +++++++++ .../validate/ck_barrier_tournament.c | 146 ++++++++++ src/ck_barrier.c | 264 ++++++++++++++++-- 6 files changed, 630 insertions(+), 58 deletions(-) create mode 100644 regressions/ck_barrier/validate/ck_barrier_mcs.c create mode 100644 regressions/ck_barrier/validate/ck_barrier_tournament.c diff --git a/include/ck_barrier.h b/include/ck_barrier.h index 6d75a85..74937da 100644 --- a/include/ck_barrier.h +++ b/include/ck_barrier.h @@ -49,13 +49,13 @@ void ck_barrier_centralized(ck_barrier_centralized_t *, unsigned int); struct ck_barrier_combining_group { - unsigned int k; - unsigned int count; - unsigned int sense; struct ck_barrier_combining_group *parent; struct ck_barrier_combining_group *lchild; struct ck_barrier_combining_group *rchild; struct ck_barrier_combining_group *next; + unsigned int k; + unsigned int count; + unsigned int sense; } CK_CC_CACHELINE; typedef struct ck_barrier_combining_group ck_barrier_combining_group_t; @@ -82,30 +82,86 @@ void ck_barrier_combining(ck_barrier_combining_t *, ck_barrier_combining_group_t *, ck_barrier_combining_state_t *); -struct ck_barrier_dissemination_flags { - unsigned int *tflags[2]; - unsigned int **pflags[2]; +struct ck_barrier_dissemination_internal { + unsigned int tflag; + unsigned int *pflag; }; -typedef struct ck_barrier_dissemination_flags ck_barrier_dissemination_flags_t; +typedef struct ck_barrier_dissemination_internal ck_barrier_dissemination_internal_t; +struct ck_barrier_dissemination { + struct ck_barrier_dissemination_internal *flags[2]; +}; +typedef struct ck_barrier_dissemination ck_barrier_dissemination_t; struct ck_barrier_dissemination_state { - int parity; - unsigned int sense; + int parity; + unsigned int sense; + unsigned int tid; }; typedef struct ck_barrier_dissemination_state ck_barrier_dissemination_state_t; -#define CK_BARRIER_DISSEMINATION_STATE_INITIALIZER {0, ~0} - -void ck_barrier_dissemination_flags_init(ck_barrier_dissemination_flags_t *, - int); +void ck_barrier_dissemination_init(ck_barrier_dissemination_t *, + ck_barrier_dissemination_internal_t **, + unsigned int); void ck_barrier_dissemination_state_init(ck_barrier_dissemination_state_t *); -int ck_barrier_dissemination_size(unsigned int); +unsigned int ck_barrier_dissemination_size(unsigned int); -void ck_barrier_dissemination(ck_barrier_dissemination_flags_t *, - ck_barrier_dissemination_state_t *, - int, - int); +void ck_barrier_dissemination(ck_barrier_dissemination_t *, + ck_barrier_dissemination_state_t *); + +struct ck_barrier_tournament_round { + enum {BYE, CHAMPION, DROPOUT, LOSER, WINNER} role; + unsigned int *opponent; + unsigned int flag; +}; +typedef struct ck_barrier_tournament_round ck_barrier_tournament_round_t; + +struct ck_barrier_tournament_state { + unsigned int sense; + unsigned int vpid; +}; +typedef struct ck_barrier_tournament_state ck_barrier_tournament_state_t; + +void +ck_barrier_tournament_state_init(ck_barrier_tournament_state_t *); + +void +ck_barrier_tournament_round_init(ck_barrier_tournament_round_t **, + unsigned int); + +unsigned int ck_barrier_tournament_size(unsigned int); + +void +ck_barrier_tournament(ck_barrier_tournament_round_t **, + ck_barrier_tournament_state_t *); + +struct ck_barrier_mcs { + unsigned int *children[2]; + unsigned int childnotready[4]; + unsigned int dummy; + unsigned int havechild[4]; + unsigned int *parent; + unsigned int parentsense; +}; +typedef struct ck_barrier_mcs ck_barrier_mcs_t; + +struct ck_barrier_mcs_state { + unsigned int sense; + unsigned int vpid; +}; +typedef struct ck_barrier_mcs_state ck_barrier_mcs_state_t; + +void +ck_barrier_mcs_init(ck_barrier_mcs_t *, + unsigned int); + +void +ck_barrier_mcs_state_init(ck_barrier_mcs_state_t *); + +void +ck_barrier_mcs(ck_barrier_mcs_t *, + ck_barrier_mcs_state_t *); #endif /* _CK_BARRIER_H */ + diff --git a/regressions/ck_barrier/validate/Makefile b/regressions/ck_barrier/validate/Makefile index fe27e3d..7a49792 100644 --- a/regressions/ck_barrier/validate/Makefile +++ b/regressions/ck_barrier/validate/Makefile @@ -1,6 +1,6 @@ .PHONY: clean distribution -OBJECTS=ck_barrier_centralized ck_barrier_combining ck_barrier_dissemination +OBJECTS=ck_barrier_centralized ck_barrier_combining ck_barrier_dissemination ck_barrier_tournament ck_barrier_mcs all: $(OBJECTS) @@ -13,6 +13,12 @@ ck_barrier_combining: ck_barrier_combining.c ../../../include/ck_barrier.h ../.. ck_barrier_dissemination: ck_barrier_dissemination.c ../../../include/ck_barrier.h ../../../src/ck_barrier.c $(CC) $(CFLAGS) -o ck_barrier_dissemination ck_barrier_dissemination.c ../../../src/ck_barrier.c +ck_barrier_tournament: ck_barrier_tournament.c ../../../include/ck_barrier.h ../../../src/ck_barrier.c + $(CC) $(CFLAGS) -o ck_barrier_tournament ck_barrier_tournament.c ../../../src/ck_barrier.c + +ck_barrier_mcs: ck_barrier_mcs.c ../../../include/ck_barrier.h ../../../src/ck_barrier.c + $(CC) $(CFLAGS) -o ck_barrier_mcs ck_barrier_mcs.c ../../../src/ck_barrier.c + clean: rm -rf *.dSYM *~ *.o $(OBJECTS) diff --git a/regressions/ck_barrier/validate/ck_barrier_dissemination.c b/regressions/ck_barrier/validate/ck_barrier_dissemination.c index 1549f67..40bd908 100644 --- a/regressions/ck_barrier/validate/ck_barrier_dissemination.c +++ b/regressions/ck_barrier/validate/ck_barrier_dissemination.c @@ -51,20 +51,18 @@ static struct affinity a; static int nthr; -static int tid; static int counters[ENTRIES]; static int barrier_wait; static void * -thread(void *allflags) +thread(void *barrier) { - ck_barrier_dissemination_state_t state = CK_BARRIER_DISSEMINATION_STATE_INITIALIZER; - int j, k, counter, id; + ck_barrier_dissemination_state_t state; + int j, k, counter; int i = 0; aff_iterate(&a); - - id = ck_pr_faa_int(&tid, 1); + ck_barrier_dissemination_state_init(&state); ck_pr_inc_int(&barrier_wait); while (ck_pr_load_int(&barrier_wait) != nthr) @@ -73,7 +71,7 @@ thread(void *allflags) for (j = 0, k = 0; j < ITERATE; j++, k++) { i = j++ & (ENTRIES - 1); ck_pr_inc_int(&counters[i]); - ck_barrier_dissemination(allflags, &state, id, nthr); + ck_barrier_dissemination(barrier, &state); counter = ck_pr_load_int(&counters[i]); if (counter != nthr * (j / ENTRIES + 1)) { fprintf(stderr, "FAILED [%d:%d]: %d != %d\n", i, j - 1, counter, nthr); @@ -87,7 +85,8 @@ thread(void *allflags) int main(int argc, char *argv[]) { - ck_barrier_dissemination_flags_t *allflags; + ck_barrier_dissemination_t *barrier; + ck_barrier_dissemination_internal_t **barrier_internal; pthread_t *threads; int i, size; @@ -110,24 +109,31 @@ main(int argc, char *argv[]) a.delta = atoi(argv[2]); - allflags = malloc(sizeof(ck_barrier_dissemination_flags_t) * nthr); - if (allflags == NULL) { - fprintf(stderr, "ERROR: Could not allocate thread structures\n"); + barrier = malloc(sizeof(ck_barrier_dissemination_t) * nthr); + if (barrier == NULL) { + fprintf(stderr, "ERROR: Could not allocate barrier structures\n"); + exit(EXIT_FAILURE); + } + + barrier_internal = malloc(sizeof(ck_barrier_dissemination_internal_t *) * nthr); + if (barrier_internal == NULL) { + fprintf(stderr, "ERROR: Could not allocate barrier structures\n"); exit(EXIT_FAILURE); } size = ck_barrier_dissemination_size(nthr); - for (i = 0; i < nthr; i++) { - allflags[i].tflags[0] = malloc(sizeof(unsigned int) * size); - allflags[i].tflags[1] = malloc(sizeof(unsigned int) * size); - allflags[i].pflags[0] = malloc(sizeof(unsigned int *) * size); - allflags[i].pflags[1] = malloc(sizeof(unsigned int *) * size); + for (i = 0; i < nthr; ++i) { + barrier_internal[i] = malloc(sizeof(ck_barrier_dissemination_internal_t) * size); + if (barrier_internal[i] == NULL) { + fprintf(stderr, "ERROR: Could not allocate barrier structures\n"); + exit(EXIT_FAILURE); + } } - ck_barrier_dissemination_flags_init(allflags, nthr); + ck_barrier_dissemination_init(barrier, barrier_internal, nthr); fprintf(stderr, "Creating threads (barrier)..."); for (i = 0; i < nthr; i++) { - if (pthread_create(&threads[i], NULL, thread, allflags)) { + if (pthread_create(&threads[i], NULL, thread, barrier)) { fprintf(stderr, "ERROR: Could not create thread %d\n", i); exit(EXIT_FAILURE); } diff --git a/regressions/ck_barrier/validate/ck_barrier_mcs.c b/regressions/ck_barrier/validate/ck_barrier_mcs.c new file mode 100644 index 0000000..980de56 --- /dev/null +++ b/regressions/ck_barrier/validate/ck_barrier_mcs.c @@ -0,0 +1,136 @@ +/* + * Copyright 2011 Samy Al Bahra. + * Copyright 2011 David Joseph. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "../../common.h" + +#ifndef ITERATE +#define ITERATE 5000000 +#endif + +#ifndef ENTRIES +#define ENTRIES 512 +#endif + +static struct affinity a; +static int nthr; +static int counters[ENTRIES]; +static int barrier_wait; + +static void * +thread(void *barrier) +{ + ck_barrier_mcs_state_t state; + int j, counter; + int i = 0; + + aff_iterate(&a); + + ck_barrier_mcs_state_init(&state); + + ck_pr_inc_int(&barrier_wait); + while (ck_pr_load_int(&barrier_wait) != nthr) + ck_pr_stall(); + + for (j = 0; j < ITERATE; j++) { + i = j++ & (ENTRIES - 1); + ck_pr_inc_int(&counters[i]); + ck_barrier_mcs(barrier, &state); + counter = ck_pr_load_int(&counters[i]); + if (counter != nthr * (j / ENTRIES + 1)) { + fprintf(stderr, "FAILED [%d:%d]: %d != %d\n", i, j - 1, counter, nthr); + exit(EXIT_FAILURE); + } + } + + return (NULL); +} + +int +main(int argc, char *argv[]) +{ + pthread_t *threads; + ck_barrier_mcs_t *barrier; + int i; + + if (argc != 3) { + fprintf(stderr, "Usage: correct \n"); + exit(EXIT_FAILURE); + } + + nthr = atoi(argv[1]); + if (nthr <= 0) { + fprintf(stderr, "ERROR: Number of threads must be greater than 0\n"); + exit(EXIT_FAILURE); + } + + threads = malloc(sizeof(pthread_t) * nthr); + if (threads == NULL) { + fprintf(stderr, "ERROR: Could not allocate thread structures\n"); + exit(EXIT_FAILURE); + } + + barrier = malloc(sizeof(ck_barrier_mcs_t) * nthr); + if (barrier == NULL) { + fprintf(stderr, "ERROR: Could not allocate barrier structures\n"); + exit(EXIT_FAILURE); + } + ck_barrier_mcs_init(barrier, nthr); + + a.delta = atoi(argv[2]); + + fprintf(stderr, "Creating threads (barrier)..."); + for (i = 0; i < nthr; i++) { + if (pthread_create(&threads[i], NULL, thread, barrier)) { + fprintf(stderr, "ERROR: Could not create thread %d\n", i); + exit(EXIT_FAILURE); + } + } + fprintf(stderr, "done\n"); + + fprintf(stderr, "Waiting for threads to finish correctness regression..."); + for (i = 0; i < nthr; i++) + pthread_join(threads[i], NULL); + fprintf(stderr, "done (passed)\n"); + + + return (0); +} + diff --git a/regressions/ck_barrier/validate/ck_barrier_tournament.c b/regressions/ck_barrier/validate/ck_barrier_tournament.c new file mode 100644 index 0000000..7fdfc15 --- /dev/null +++ b/regressions/ck_barrier/validate/ck_barrier_tournament.c @@ -0,0 +1,146 @@ +/* + * Copyright 2011 Samy Al Bahra. + * Copyright 2011 David Joseph. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "../../common.h" + +#ifndef ITERATE +#define ITERATE 5000000 +#endif + +#ifndef ENTRIES +#define ENTRIES 512 +#endif + +static struct affinity a; +static int nthr; +static int counters[ENTRIES]; +static int barrier_wait; + +static void * +thread(void *rounds) +{ + ck_barrier_tournament_state_t state; + int j, counter; + int i = 0; + + aff_iterate(&a); + + ck_barrier_tournament_state_init(&state); + + ck_pr_inc_int(&barrier_wait); + while (ck_pr_load_int(&barrier_wait) != nthr) + ck_pr_stall(); + + for (j = 0; j < ITERATE; j++) { + i = j++ & (ENTRIES - 1); + ck_pr_inc_int(&counters[i]); + ck_barrier_tournament(rounds, &state); + counter = ck_pr_load_int(&counters[i]); + if (counter != nthr * (j / ENTRIES + 1)) { + fprintf(stderr, "FAILED [%d:%d]: %d != %d\n", i, j - 1, counter, nthr); + exit(EXIT_FAILURE); + } + } + + return (NULL); +} + +int +main(int argc, char *argv[]) +{ + pthread_t *threads; + ck_barrier_tournament_round_t **rounds; + int i; + unsigned int size; + + if (argc != 3) { + fprintf(stderr, "Usage: correct \n"); + exit(EXIT_FAILURE); + } + + nthr = atoi(argv[1]); + if (nthr <= 0) { + fprintf(stderr, "ERROR: Number of threads must be greater than 0\n"); + exit(EXIT_FAILURE); + } + + threads = malloc(sizeof(pthread_t) * nthr); + if (threads == NULL) { + fprintf(stderr, "ERROR: Could not allocate thread structures\n"); + exit(EXIT_FAILURE); + } + + rounds = malloc(sizeof(ck_barrier_tournament_round_t *) * nthr); + if (rounds == NULL) { + fprintf(stderr, "ERROR: Could not allocate barrier structures\n"); + exit(EXIT_FAILURE); + } + + size = ck_barrier_tournament_size(nthr); + for (i = 0; i < nthr; ++i) { + rounds[i] = malloc(sizeof(ck_barrier_tournament_round_t) * size); + if (rounds[i] == NULL) { + fprintf(stderr, "ERROR: Could not allocate barrier structures\n"); + exit(EXIT_FAILURE); + } + } + ck_barrier_tournament_round_init(rounds, nthr); + + a.delta = atoi(argv[2]); + + fprintf(stderr, "Creating threads (barrier)..."); + for (i = 0; i < nthr; i++) { + if (pthread_create(&threads[i], NULL, thread, rounds)) { + fprintf(stderr, "ERROR: Could not create thread %d\n", i); + exit(EXIT_FAILURE); + } + } + fprintf(stderr, "done\n"); + + fprintf(stderr, "Waiting for threads to finish correctness regression..."); + for (i = 0; i < nthr; i++) + pthread_join(threads[i], NULL); + fprintf(stderr, "done (passed)\n"); + + + return (0); +} + diff --git a/src/ck_barrier.c b/src/ck_barrier.c index b309d80..b119a3a 100644 --- a/src/ck_barrier.c +++ b/src/ck_barrier.c @@ -32,6 +32,11 @@ #include +struct ck_barrier_combining_queue { + struct ck_barrier_combining_group *head; + struct ck_barrier_combining_group *tail; +}; + /* * Log and power_2 algorithms from: http://graphics.stanford.edu/~seander/bithacks.html */ @@ -54,6 +59,7 @@ ck_barrier_internal_log(unsigned int v) CK_CC_INLINE static unsigned int ck_barrier_internal_power_2(unsigned int v) { + --v; v |= v >> 1; v |= v >> 2; @@ -65,11 +71,6 @@ ck_barrier_internal_power_2(unsigned int v) return (v); } -struct ck_barrier_combining_queue { - struct ck_barrier_combining_group *head; - struct ck_barrier_combining_group *tail; -}; - void ck_barrier_centralized(struct ck_barrier_centralized *barrier, struct ck_barrier_centralized_state *state, @@ -95,6 +96,7 @@ CK_CC_INLINE static void ck_barrier_combining_queue_enqueue(struct ck_barrier_combining_queue *queue, struct ck_barrier_combining_group *node_value) { + node_value->next = NULL; if (queue->head == NULL) { @@ -126,6 +128,7 @@ ck_barrier_combining_try_insert(struct ck_barrier_combining_group *parent, struct ck_barrier_combining_group *tnode, struct ck_barrier_combining_group **child) { + if (*child == NULL) { *child = tnode; tnode->parent = parent; @@ -142,6 +145,7 @@ ck_barrier_combining_aux(struct ck_barrier_combining *barrier, struct ck_barrier_combining_group *tnode, unsigned int sense) { + if (ck_pr_faa_uint(&tnode->count, 1) == tnode->k - 1) { if (tnode->parent != NULL) ck_barrier_combining_aux(barrier, tnode->parent, sense); @@ -195,6 +199,7 @@ void ck_barrier_combining_init(struct ck_barrier_combining *root, struct ck_barrier_combining_group *init_root) { + init_root->k = 0; init_root->count = 0; init_root->sense = 0; @@ -209,18 +214,30 @@ ck_barrier_combining(struct ck_barrier_combining *barrier, struct ck_barrier_combining_group *tnode, struct ck_barrier_combining_state *state) { + ck_barrier_combining_aux(barrier, tnode, state->sense); state->sense = ~state->sense; return; } +static unsigned int ck_barrier_dissemination_nthr; +static unsigned int ck_barrier_dissemination_tid; + void -ck_barrier_dissemination_flags_init(struct ck_barrier_dissemination_flags *allflags, - int nthr) +ck_barrier_dissemination_init(struct ck_barrier_dissemination *barrier, + struct ck_barrier_dissemination_internal **barrier_internal, + unsigned int nthr) { - int i, j, k, size, offset; + unsigned int i, j, k, size, offset; + ck_barrier_dissemination_nthr = nthr; size = (ck_barrier_internal_log(ck_barrier_internal_power_2(nthr))); + + for (i = 0; i < nthr; ++i) { + barrier[i].flags[0] = barrier_internal[i]; + barrier[i].flags[1] = barrier_internal[i] + size; + } + for (i = 0; i < nthr; ++i) { for (k = 0, offset = 1; k < size; ++k, offset <<= 1) { /* Determine the thread's partner, j, for the current round. */ @@ -230,10 +247,11 @@ ck_barrier_dissemination_flags_init(struct ck_barrier_dissemination_flags *allfl j = (i + offset) % nthr; /* Set the thread's partner for round k. */ - allflags[i].pflags[0][k] = &allflags[j].tflags[0][k]; - allflags[i].pflags[1][k] = &allflags[j].tflags[1][k]; + barrier[i].flags[0][k].pflag = &barrier[j].flags[0][k].tflag; + barrier[i].flags[1][k].pflag = &barrier[j].flags[1][k].tflag; + /* Set the thread's flags to false. */ - allflags[i].tflags[0][k] = allflags[i].tflags[1][k] = 0; + barrier[i].flags[0][k].tflag = barrier[i].flags[1][k].tflag = 0; } } @@ -243,32 +261,33 @@ ck_barrier_dissemination_flags_init(struct ck_barrier_dissemination_flags *allfl void ck_barrier_dissemination_state_init(struct ck_barrier_dissemination_state *state) { + state->parity = 0; state->sense = ~0; + state->tid = ck_pr_faa_uint(&ck_barrier_dissemination_tid, 1); return; } -int +unsigned int ck_barrier_dissemination_size(unsigned int nthr) { - return (ck_barrier_internal_log(ck_barrier_internal_power_2(nthr))); + + return (ck_barrier_internal_log(ck_barrier_internal_power_2(nthr)) * 2); } void -ck_barrier_dissemination(struct ck_barrier_dissemination_flags *allflags, - struct ck_barrier_dissemination_state *state, - int tid, - int nthr) +ck_barrier_dissemination(struct ck_barrier_dissemination *barrier, + struct ck_barrier_dissemination_state *state) { - int i, size; + unsigned int i, size; - size = (ck_barrier_internal_log(ck_barrier_internal_power_2(nthr))); + size = (ck_barrier_internal_log(ck_barrier_internal_power_2(ck_barrier_dissemination_nthr))); for (i = 0; i < size; ++i) { /* Unblock current partner. */ - ck_pr_store_uint(allflags[tid].pflags[state->parity][i], state->sense); + ck_pr_store_uint(barrier[state->tid].flags[state->parity][i].pflag, state->sense); /* Wait until some other thread unblocks this one. */ - while (ck_pr_load_uint(&allflags[tid].tflags[state->parity][i]) != state->sense) + while (ck_pr_load_uint(&barrier[state->tid].flags[state->parity][i].tflag) != state->sense) ck_pr_stall(); } @@ -285,3 +304,206 @@ ck_barrier_dissemination(struct ck_barrier_dissemination_flags *allflags, return; } +static unsigned int ck_barrier_tournament_tid; + +void +ck_barrier_tournament_state_init(ck_barrier_tournament_state_t *state) +{ + + state->sense = ~0; + state->vpid = ck_pr_faa_uint(&ck_barrier_tournament_tid, 1); + return; +} + +void +ck_barrier_tournament_round_init(struct ck_barrier_tournament_round **rounds, + unsigned int nthr) +{ + unsigned int i, k, size, twok, twokm1, imod2k; + + size = ck_barrier_tournament_size(nthr); + for (i = 0; i < nthr; ++i) { + /* + * By intializing this outside of the inner loop, we can avoid + * checking k > 0 for every iteration. + */ + rounds[i][0].flag = 0; + rounds[i][0].role = DROPOUT; + for (k = 1, twok = 2, twokm1 = 1; k < size; ++k, twokm1 = twok, twok <<= 1) { + rounds[i][k].flag = 0; + + imod2k = i & (twok - 1); + if (imod2k == 0) { + if ((i + twokm1 < nthr) && (twok < nthr)) + rounds[i][k].role = WINNER; + else if (i + twokm1 >= nthr) + rounds[i][k].role = BYE; + } + if (imod2k == twokm1) + rounds[i][k].role = LOSER; + else if ((i == 0) && (twok >= nthr)) + rounds[i][k].role = CHAMPION; + + if (rounds[i][k].role == LOSER) + rounds[i][k].opponent = &rounds[i - twokm1][k].flag; + else if (rounds[i][k].role == WINNER || rounds[i][k].role == CHAMPION) + rounds[i][k].opponent = &rounds[i + twokm1][k].flag; + } + } + + return; +} + +unsigned int +ck_barrier_tournament_size(unsigned int nthr) +{ + + return (ck_barrier_internal_log(ck_barrier_internal_power_2(nthr)) + 1); +} + +void +ck_barrier_tournament(struct ck_barrier_tournament_round **rounds, + struct ck_barrier_tournament_state *state) +{ + int round = 1; + + for (;; ++round) { + switch (rounds[state->vpid][round].role) { // MIGHT NEED TO USE CK_PR_LOAD + case BYE: + break; + case CHAMPION: + while (ck_pr_load_uint(&rounds[state->vpid][round].flag) != state->sense) + ck_pr_stall(); + ck_pr_store_uint(rounds[state->vpid][round].opponent, state->sense); + goto wakeup; + break; + case DROPOUT: + /* NOTREACHED */ + break; + case LOSER: + ck_pr_store_uint(rounds[state->vpid][round].opponent, state->sense); + while (ck_pr_load_uint(&rounds[state->vpid][round].flag) != state->sense) + ck_pr_stall(); + goto wakeup; + break; + case WINNER: + while (ck_pr_load_uint(&rounds[state->vpid][round].flag) != state->sense) + ck_pr_stall(); + break; + } + } +wakeup: + for (round -= 1;; --round) { + switch (rounds[state->vpid][round].role) { // MIGHT NEED TO USE CK_PR_LOAD + case BYE: + break; + case CHAMPION: + /* NOTREACHED */ + break; + case DROPOUT: + goto leave; + break; + case LOSER: + /* NOTREACHED */ + break; + case WINNER: + ck_pr_store_uint(rounds[state->vpid][round].opponent, state->sense); + break; + } + } + +leave: + state->sense = ~state->sense; + return; +} + +static unsigned int ck_barrier_mcs_tid; + +void +ck_barrier_mcs_init(struct ck_barrier_mcs *barrier, + unsigned int nthr) +{ + unsigned int i, j; + + for (i = 0; i < nthr; ++i) { + for (j = 0; j < 4; ++j) { + barrier[i].havechild[j] = ((i << 2) + j < nthr - 1) ? + ~0 : + 0; + barrier[i].childnotready[j] = barrier[i].havechild[j]; + } + + barrier[i].parent = (i == 0) ? + &barrier[i].dummy : + &barrier[(i - 1) >> 2].childnotready[(i - 1) & 3]; + + barrier[i].children[0] = ((i << 1) + 1 >= nthr) ? + &barrier[i].dummy : + &barrier[(i << 1) + 1].parentsense; + + barrier[i].children[1] = ((i << 1) + 2 >= nthr) ? + &barrier[i].dummy : + &barrier[(i << 1) + 2].parentsense; + + barrier[i].parentsense = 0; + } + + return; +} + +void +ck_barrier_mcs_state_init(struct ck_barrier_mcs_state *state) +{ + + state->sense = ~0; + state->vpid = ck_pr_faa_uint(&ck_barrier_mcs_tid, 1); + return; +} + +CK_CC_INLINE static bool +ck_barrier_mcs_check_children(unsigned int *childnotready) +{ + int i; + + for (i = 0; i < 4; ++i) { + if (ck_pr_load_uint(&childnotready[i]) != 0) + return (false); + } + + return (true); +} + +CK_CC_INLINE static void +ck_barrier_mcs_reinitialize_children(struct ck_barrier_mcs *node) +{ + int i; + + for (i = 0; i < 4; ++i) + ck_pr_store_uint(&node->childnotready[i], node->havechild[i]); + + return; +} + +void +ck_barrier_mcs(struct ck_barrier_mcs *barrier, + struct ck_barrier_mcs_state *state) +{ + + while (ck_barrier_mcs_check_children(barrier[state->vpid].childnotready) == false) + ck_pr_stall(); + ck_barrier_mcs_reinitialize_children(&barrier[state->vpid]); + + ck_pr_store_uint(barrier[state->vpid].parent, 0); + + if (state->vpid != 0) { + while (ck_pr_load_uint(&barrier[state->vpid].parentsense) != state->sense) + ck_pr_stall(); + } + + ck_pr_store_uint(barrier[state->vpid].children[0], state->sense); + ck_pr_store_uint(barrier[state->vpid].children[1], state->sense); + state->sense = ~state->sense; + + return; +} +