Software tree combining barrier now supports an arbitrary number of threads per group.

Changed some structure names for ck_barrier_combining.
Modified unit tests to account for groups of threads.
ck_pring
David Joseph 14 years ago
parent 3b63432d03
commit 5e7073b9b7

@ -75,17 +75,17 @@ ck_barrier_centralized(struct ck_barrier_centralized *barrier,
#ifndef CK_F_BARRIER_COMBINING
#define CK_F_BARRIER_COMBINING
struct ck_barrier_combining_entry {
struct ck_barrier_combining_group {
unsigned int k;
unsigned int count;
unsigned int sense;
struct ck_barrier_combining_entry *parent;
struct ck_barrier_combining_entry *lchild;
struct ck_barrier_combining_entry *rchild;
struct ck_barrier_combining_entry *next;
struct ck_barrier_combining_group *parent;
struct ck_barrier_combining_group *lchild;
struct ck_barrier_combining_group *rchild;
struct ck_barrier_combining_group *next;
};
typedef struct ck_barrier_combining_entry ck_barrier_combining_entry_t;
typedef struct ck_barrier_combining_group ck_barrier_combining_group_t;
struct ck_barrier_combining_state {
unsigned int sense;
@ -96,15 +96,15 @@ typedef struct ck_barrier_combining_state ck_barrier_combining_state_t;
#define CK_BARRIER_COMBINING_STATE_INITIALIZER {~0}
struct ck_barrier_combining {
struct ck_barrier_combining_entry *root;
struct ck_barrier_combining_group *root;
ck_spinlock_fas_t mutex;
};
typedef struct ck_barrier_combining ck_barrier_combining_t;
struct ck_barrier_combining_queue {
struct ck_barrier_combining_entry *head;
struct ck_barrier_combining_entry *tail;
struct ck_barrier_combining_group *head;
struct ck_barrier_combining_group *tail;
};
CK_CC_INLINE static void
@ -116,7 +116,7 @@ ck_barrier_combining_queue_init(struct ck_barrier_combining_queue *queue)
CK_CC_INLINE static void
ck_barrier_combining_queue_enqueue(struct ck_barrier_combining_queue *queue,
struct ck_barrier_combining_entry *node_value)
struct ck_barrier_combining_group *node_value)
{
node_value->next = NULL;
@ -132,10 +132,10 @@ ck_barrier_combining_queue_enqueue(struct ck_barrier_combining_queue *queue,
return;
}
CK_CC_INLINE static struct ck_barrier_combining_entry *
CK_CC_INLINE static struct ck_barrier_combining_group *
ck_barrier_combining_queue_dequeue(struct ck_barrier_combining_queue *queue)
{
struct ck_barrier_combining_entry *front = NULL;
struct ck_barrier_combining_group *front = NULL;
if (queue->head != NULL) {
front = queue->head;
@ -147,7 +147,7 @@ ck_barrier_combining_queue_dequeue(struct ck_barrier_combining_queue *queue)
CK_CC_INLINE static void
ck_barrier_combining_init(struct ck_barrier_combining *root,
struct ck_barrier_combining_entry *init_root)
struct ck_barrier_combining_group *init_root)
{
init_root->k = 0;
@ -160,9 +160,9 @@ ck_barrier_combining_init(struct ck_barrier_combining *root,
}
CK_CC_INLINE static bool
ck_barrier_combining_try_insert(struct ck_barrier_combining_entry *parent,
struct ck_barrier_combining_entry *tnode,
struct ck_barrier_combining_entry **child)
ck_barrier_combining_try_insert(struct ck_barrier_combining_group *parent,
struct ck_barrier_combining_group *tnode,
struct ck_barrier_combining_group **child)
{
if (*child == NULL) {
@ -177,15 +177,16 @@ ck_barrier_combining_try_insert(struct ck_barrier_combining_entry *parent,
}
CK_CC_INLINE static void
ck_barrier_combining_entry_init(struct ck_barrier_combining *root,
struct ck_barrier_combining_entry *tnode)
ck_barrier_combining_group_init(struct ck_barrier_combining *root,
struct ck_barrier_combining_group *tnode,
unsigned int nthr)
{
struct ck_barrier_combining_entry *node;
struct ck_barrier_combining_group *node;
struct ck_barrier_combining_queue queue;
ck_barrier_combining_queue_init(&queue);
tnode->k = 1;
tnode->k = nthr;
tnode->count = 0;
tnode->sense = 0;
tnode->lchild = tnode->rchild = NULL;
@ -212,7 +213,7 @@ leave:
CK_CC_INLINE static void
ck_barrier_combining_aux(struct ck_barrier_combining *barrier,
struct ck_barrier_combining_entry *tnode,
struct ck_barrier_combining_group *tnode,
unsigned int sense)
{
@ -239,11 +240,13 @@ ck_barrier_combining_aux(struct ck_barrier_combining *barrier,
CK_CC_INLINE static void
ck_barrier_combining(struct ck_barrier_combining *barrier,
struct ck_barrier_combining_entry *tnode,
struct ck_barrier_combining_group *tnode,
struct ck_barrier_combining_state *state)
{
ck_barrier_combining_aux(barrier, tnode, state->sense);
state->sense = ~state->sense;
return;
}
#endif /* CK_F_BARRIER_COMBINING */

@ -58,9 +58,8 @@ static void *
thread(void *null CK_CC_UNUSED)
{
ck_barrier_centralized_state_t state = CK_BARRIER_CENTRALIZED_STATE_INITIALIZER;
int j;
int j, counter;
int i = 0;
int counter;
aff_iterate(&a);

@ -51,40 +51,31 @@
static struct affinity a;
static int nthr;
static int ngroups;
static int counters[ENTRIES];
static ck_barrier_combining_t barrier;
static int barrier_wait;
static void *
thread(void *null CK_CC_UNUSED)
thread(void *group)
{
ck_barrier_combining_entry_t *tnode;
ck_barrier_combining_state_t state = CK_BARRIER_COMBINING_STATE_INITIALIZER;
int j;
int j, counter;
int i = 0;
int counter;
aff_iterate(&a);
tnode = malloc(sizeof(ck_barrier_combining_entry_t));
if (tnode == NULL) {
fprintf(stderr, "Could not allocate thread barrier entry\n");
exit(EXIT_FAILURE);
}
ck_barrier_combining_entry_init(&barrier, tnode);
ck_pr_inc_int(&barrier_wait);
while (ck_pr_load_int(&barrier_wait) != nthr)
while (ck_pr_load_int(&barrier_wait) != (nthr * ngroups))
ck_pr_stall();
for (j = 0; j < ITERATE; j++) {
i = j++ & (ENTRIES - 1);
ck_pr_inc_int(&counters[i]);
ck_barrier_combining(&barrier, tnode, &state);
ck_barrier_combining(&barrier, group, &state);
counter = ck_pr_load_int(&counters[i]);
if (counter != nthr * (j / ENTRIES + 1)) {
fprintf(stderr, "FAILED [%d:%d]: %d != %d\n", i, j - 1, counter, nthr);
if (counter != nthr * ngroups * (j / ENTRIES + 1)) {
fprintf(stderr, "FAILED [%d:%d]: %d != %d\n", i, j - 1, counter, nthr * ngroups);
exit(EXIT_FAILURE);
}
}
@ -96,38 +87,55 @@ int
main(int argc, char *argv[])
{
pthread_t *threads;
ck_barrier_combining_entry_t *init_root;
ck_barrier_combining_group_t *groupings;
ck_barrier_combining_group_t *init_root;
int i;
init_root = malloc(sizeof(ck_barrier_combining_entry_t));
init_root = malloc(sizeof(ck_barrier_combining_group_t));
if (init_root == NULL) {
fprintf(stderr, "ERROR: Could not allocate initial barrier structure\n");
exit(EXIT_FAILURE);
}
ck_barrier_combining_init(&barrier, init_root);
if (argc != 3) {
fprintf(stderr, "Usage: correct <number of threads> <affinity delta>\n");
if (argc != 4) {
fprintf(stderr, "Usage: correct <total groups> <threads per group> <affinity delta>\n");
exit(EXIT_FAILURE);
}
ngroups = atoi(argv[1]);
if (ngroups <= 0) {
fprintf(stderr, "ERROR: Number of groups must be greater than 0\n");
exit(EXIT_FAILURE);
}
nthr = atoi(argv[1]);
nthr = atoi(argv[2]);
if (nthr <= 0) {
fprintf(stderr, "ERROR: Number of threads must be greater than 0\n");
exit(EXIT_FAILURE);
}
threads = malloc(sizeof(pthread_t) * nthr);
groupings = malloc(sizeof(ck_barrier_combining_group_t) * ngroups);
if (groupings == NULL) {
fprintf(stderr, "Could not allocate thread barrier grouping structures\n");
exit(EXIT_FAILURE);
}
threads = malloc(sizeof(pthread_t) * nthr * ngroups);
if (threads == NULL) {
fprintf(stderr, "ERROR: Could not allocate thread structures\n");
exit(EXIT_FAILURE);
}
a.delta = atoi(argv[2]);
a.delta = atoi(argv[3]);
for (i = 0; i < ngroups; i++) {
ck_barrier_combining_group_init(&barrier, groupings + i, nthr);
}
fprintf(stderr, "Creating threads (barrier)...");
for (i = 0; i < nthr; i++) {
if (pthread_create(&threads[i], NULL, thread, NULL)) {
for (i = 0; i < (nthr * ngroups); i++) {
if (pthread_create(&threads[i], NULL, thread, groupings + (i % ngroups))) {
fprintf(stderr, "ERROR: Could not create thread %d\n", i);
exit(EXIT_FAILURE);
}
@ -135,7 +143,7 @@ main(int argc, char *argv[])
fprintf(stderr, "done\n");
fprintf(stderr, "Waiting for threads to finish correctness regression...");
for (i = 0; i < nthr; i++)
for (i = 0; i < (nthr * ngroups); i++)
pthread_join(threads[i], NULL);
fprintf(stderr, "done (passed)\n");

Loading…
Cancel
Save