diff --git a/include/ck_spinlock.h b/include/ck_spinlock.h index 269d727..abd2a54 100644 --- a/include/ck_spinlock.h +++ b/include/ck_spinlock.h @@ -825,6 +825,107 @@ ck_spinlock_mcs_unlock(struct ck_spinlock_mcs **queue, struct ck_spinlock_mcs *n } #endif /* CK_F_SPINLOCK_MCS */ +#ifndef CK_F_SPINLOCK_HCLH +#define CK_F_SPINLOCK_HCLH +struct ck_spinlock_hclh { + unsigned int wait; + unsigned int splice; + int cluster_id; + struct ck_spinlock_hclh *previous; +}; +typedef struct ck_spinlock_hclh ck_spinlock_hclh_t; + +CK_CC_INLINE static void +ck_spinlock_hclh_init(struct ck_spinlock_hclh **lock, struct ck_spinlock_hclh *unowned, int cluster_id) +{ + + ck_pr_store_ptr(&unowned->previous, NULL); + ck_pr_store_uint(&unowned->wait, false); + ck_pr_store_uint(&unowned->splice, false); + ck_pr_store_int(&unowned->cluster_id, cluster_id); + ck_pr_store_ptr(lock, unowned); + ck_pr_fence_store(); + return; +} + +CK_CC_INLINE static bool +ck_spinlock_hclh_locked(struct ck_spinlock_hclh **queue) +{ + struct ck_spinlock_hclh *head; + + ck_pr_fence_load(); + head = ck_pr_load_ptr(queue); + ck_pr_fence_load(); + return ck_pr_load_uint(&head->wait); +} + +CK_CC_INLINE static void +ck_spinlock_hclh_lock(struct ck_spinlock_hclh **glob_queue, struct ck_spinlock_hclh **local_queue, struct ck_spinlock_hclh *thread) +{ + struct ck_spinlock_hclh *previous, *local_tail; + + /* Indicate to the next thread on queue that they will have to block. */ + ck_pr_store_uint(&thread->wait, true); + ck_pr_store_uint(&thread->splice, false); + thread->cluster_id = (*local_queue)->cluster_id; + ck_pr_fence_store(); + + /* Mark current request as last request. Save reference to previous request. */ + previous = ck_pr_fas_ptr(local_queue, thread); + thread->previous = previous; + + /* Wait until previous thread from the local queue is done with lock. */ + ck_pr_fence_load(); + if (previous->previous != NULL && + previous->cluster_id == thread->cluster_id) { + + while (ck_pr_load_uint(&previous->wait) == true); + ck_pr_stall(); + /* We're head of the global queue, we're done */ + if (!(ck_pr_load_uint(&previous->splice))) + return; + } + + /* Now we need to splice the local queue into the global queue */ + local_tail = ck_pr_load_ptr(local_queue); + ck_pr_fence_load(); + previous = ck_pr_fas_ptr(glob_queue, local_tail); + ck_pr_store_uint(&local_tail->splice, true); + ck_pr_fence_store(); + /* Wait until previous thread from the global queue is done with lock. */ + while (ck_pr_load_uint(&previous->wait) == true) + ck_pr_stall(); + return; +} + +CK_CC_INLINE static void +ck_spinlock_hclh_unlock(struct ck_spinlock_hclh **thread) +{ + struct ck_spinlock_hclh *previous; + + /* + * If there are waiters, they are spinning on the current node wait + * flag. The flag is cleared so that the successor may complete an + * acquisition. If the caller is pre-empted then the predecessor field + * may be updated by a successor's lock operation. In order to avoid + * this, save a copy of the predecessor before setting the flag. + */ + previous = thread[0]->previous; + + /* We have to pay this cost anyways, use it as a compiler barrier too. */ + ck_pr_fence_memory(); + ck_pr_store_uint(&(*thread)->wait, false); + + /* + * Predecessor is guaranteed not to be spinning on previous request, + * so update caller to use previous structure. This allows successor + * all the time in the world to successfully read updated wait flag. + */ + *thread = previous; + return; +} +#endif /* CK_F_SPINLOCK_HCLH */ + #ifndef CK_F_SPINLOCK_CLH #define CK_F_SPINLOCK_CLH diff --git a/regressions/ck_spinlock/benchmark/Makefile b/regressions/ck_spinlock/benchmark/Makefile index 1afeb37..ca3e1cf 100644 --- a/regressions/ck_spinlock/benchmark/Makefile +++ b/regressions/ck_spinlock/benchmark/Makefile @@ -9,7 +9,8 @@ OBJECTS=ck_ticket.THROUGHPUT ck_ticket.LATENCY \ linux_spinlock.THROUGHPUT linux_spinlock.LATENCY \ ck_ticket_pb.THROUGHPUT ck_ticket_pb.LATENCY \ ck_anderson.THROUGHPUT ck_anderson.LATENCY \ - ck_spinlock.THROUGHPUT ck_spinlock.LATENCY + ck_spinlock.THROUGHPUT ck_spinlock.LATENCY \ + ck_hclh.THROUGHPUT ck_hclh.LATENCY all: $(OBJECTS) @@ -55,6 +56,12 @@ ck_clh.THROUGHPUT: ck_clh.c ck_clh.LATENCY: ck_clh.c $(CC) -DLATENCY $(CFLAGS) -o ck_clh.LATENCY ck_clh.c -lm +ck_hclh.THROUGHPUT: ck_hclh.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_hclh.THROUGHPUT ck_hclh.c -lm + +ck_hclh.LATENCY: ck_hclh.c + $(CC) -DLATENCY $(CFLAGS) -o ck_hclh.LATENCY ck_hclh.c -lm + linux_spinlock.THROUGHPUT: linux_spinlock.c $(CC) -DTHROUGHPUT $(CFLAGS) -o linux_spinlock.THROUGHPUT linux_spinlock.c -lm diff --git a/regressions/ck_spinlock/benchmark/latency.h b/regressions/ck_spinlock/benchmark/latency.h index 6ca84a0..8abc9fe 100644 --- a/regressions/ck_spinlock/benchmark/latency.h +++ b/regressions/ck_spinlock/benchmark/latency.h @@ -53,6 +53,7 @@ main(void) #endif uint64_t s_b, e_b, i; + int core = 0; s_b = rdtsc(); for (i = 0; i < STEPS; ++i) { diff --git a/regressions/ck_spinlock/benchmark/throughput.h b/regressions/ck_spinlock/benchmark/throughput.h index f2a8c63..c7271b2 100644 --- a/regressions/ck_spinlock/benchmark/throughput.h +++ b/regressions/ck_spinlock/benchmark/throughput.h @@ -70,6 +70,7 @@ LOCK_DEFINE; CK_CC_USED static void gen_lock(void) { + int core = 0; #ifdef LOCK_STATE LOCK_STATE; #endif @@ -101,8 +102,9 @@ fairness(void *null) unsigned int i = context->tid; volatile int j; long int base; + unsigned int core; - if (aff_iterate(&a)) { + if (aff_iterate_core(&a, &core)) { perror("ERROR: Could not affine thread"); exit(EXIT_FAILURE); } diff --git a/regressions/ck_spinlock/validate/Makefile b/regressions/ck_spinlock/validate/Makefile index da37bae..731b68b 100644 --- a/regressions/ck_spinlock/validate/Makefile +++ b/regressions/ck_spinlock/validate/Makefile @@ -1,7 +1,7 @@ .PHONY: check clean all: ck_ticket ck_mcs ck_dec ck_cas ck_fas ck_clh linux_spinlock \ - ck_ticket_pb ck_anderson ck_spinlock + ck_ticket_pb ck_anderson ck_spinlock ck_hclh check: all ./ck_ticket $(CORES) 1 @@ -27,6 +27,9 @@ ck_ticket_pb: ck_ticket_pb.c ck_clh: ck_clh.c $(CC) $(CFLAGS) -o ck_clh ck_clh.c +ck_hclh: ck_hclh.c + $(CC) $(CFLAGS) -o ck_hclh ck_hclh.c + ck_anderson: ck_anderson.c $(CC) $(CFLAGS) -o ck_anderson ck_anderson.c diff --git a/regressions/ck_spinlock/validate/validate.h b/regressions/ck_spinlock/validate/validate.h index 42998ab..6c45d23 100644 --- a/regressions/ck_spinlock/validate/validate.h +++ b/regressions/ck_spinlock/validate/validate.h @@ -63,8 +63,9 @@ thread(void *null CK_CC_UNUSED) #endif unsigned int i = ITERATE; unsigned int j; + unsigned int core; - if (aff_iterate(&a)) { + if (aff_iterate_core(&a, &core)) { perror("ERROR: Could not affine thread"); exit(EXIT_FAILURE); }