diff --git a/include/ck_spinlock.h b/include/ck_spinlock.h
index 269d727..abd2a54 100644
--- a/include/ck_spinlock.h
+++ b/include/ck_spinlock.h
@@ -825,6 +825,107 @@ ck_spinlock_mcs_unlock(struct ck_spinlock_mcs **queue, struct ck_spinlock_mcs *n
 }
 #endif /* CK_F_SPINLOCK_MCS */
 
+#ifndef CK_F_SPINLOCK_HCLH
+#define CK_F_SPINLOCK_HCLH
+struct ck_spinlock_hclh {
+	unsigned int wait;
+	unsigned int splice;
+	int cluster_id;
+	struct ck_spinlock_hclh *previous;
+};
+typedef struct ck_spinlock_hclh ck_spinlock_hclh_t;
+
+CK_CC_INLINE static void
+ck_spinlock_hclh_init(struct ck_spinlock_hclh **lock, struct ck_spinlock_hclh *unowned, int cluster_id)
+{
+
+	ck_pr_store_ptr(&unowned->previous, NULL);
+	ck_pr_store_uint(&unowned->wait, false);
+	ck_pr_store_uint(&unowned->splice, false);
+	ck_pr_store_int(&unowned->cluster_id, cluster_id);
+	ck_pr_store_ptr(lock, unowned);
+	ck_pr_fence_store();
+	return;
+}
+
+CK_CC_INLINE static bool
+ck_spinlock_hclh_locked(struct ck_spinlock_hclh **queue)
+{
+	struct ck_spinlock_hclh *head;
+
+	ck_pr_fence_load();
+	head = ck_pr_load_ptr(queue);
+	ck_pr_fence_load();
+	return ck_pr_load_uint(&head->wait);
+}
+
+CK_CC_INLINE static void
+ck_spinlock_hclh_lock(struct ck_spinlock_hclh **glob_queue, struct ck_spinlock_hclh **local_queue, struct ck_spinlock_hclh *thread)
+{
+	struct ck_spinlock_hclh *previous, *local_tail;
+
+	/* Indicate to the next thread on queue that they will have to block. */
+	ck_pr_store_uint(&thread->wait, true);
+	ck_pr_store_uint(&thread->splice, false);
+	thread->cluster_id = (*local_queue)->cluster_id;
+	ck_pr_fence_store();
+
+	/* Mark current request as last request. Save reference to previous request. */
+	previous = ck_pr_fas_ptr(local_queue, thread);
+	thread->previous = previous;
+
+	/* Wait until previous thread from the local queue is done with lock. */
+	ck_pr_fence_load();
+	if (previous->previous != NULL &&
+	    previous->cluster_id == thread->cluster_id) {
+
+		while (ck_pr_load_uint(&previous->wait) == true);
+			ck_pr_stall();
+		/* We're head of the global queue, we're done */
+		if (!(ck_pr_load_uint(&previous->splice)))
+			return;
+	} 
+
+	/* Now we need to splice the local queue into the global queue */
+	local_tail = ck_pr_load_ptr(local_queue);
+	ck_pr_fence_load();
+	previous = ck_pr_fas_ptr(glob_queue, local_tail);
+	ck_pr_store_uint(&local_tail->splice, true);
+	ck_pr_fence_store();
+	/* Wait until previous thread from the global queue is done with lock. */
+	while (ck_pr_load_uint(&previous->wait) == true)
+		ck_pr_stall();
+	return;
+}
+
+CK_CC_INLINE static void
+ck_spinlock_hclh_unlock(struct ck_spinlock_hclh **thread)
+{
+	struct ck_spinlock_hclh *previous;
+
+	/*
+	 * If there are waiters, they are spinning on the current node wait
+	 * flag. The flag is cleared so that the successor may complete an
+	 * acquisition. If the caller is pre-empted then the predecessor field
+	 * may be updated by a successor's lock operation. In order to avoid
+	 * this, save a copy of the predecessor before setting the flag.
+	 */
+	previous = thread[0]->previous;
+
+	/* We have to pay this cost anyways, use it as a compiler barrier too. */
+	ck_pr_fence_memory();
+	ck_pr_store_uint(&(*thread)->wait, false);
+
+	/*
+	 * Predecessor is guaranteed not to be spinning on previous request,
+	 * so update caller to use previous structure. This allows successor
+	 * all the time in the world to successfully read updated wait flag.
+	 */
+	*thread = previous;
+	return;
+}
+#endif /* CK_F_SPINLOCK_HCLH */
+
 #ifndef CK_F_SPINLOCK_CLH
 #define CK_F_SPINLOCK_CLH
 
diff --git a/regressions/ck_spinlock/benchmark/Makefile b/regressions/ck_spinlock/benchmark/Makefile
index 1afeb37..ca3e1cf 100644
--- a/regressions/ck_spinlock/benchmark/Makefile
+++ b/regressions/ck_spinlock/benchmark/Makefile
@@ -9,7 +9,8 @@ OBJECTS=ck_ticket.THROUGHPUT ck_ticket.LATENCY			\
 	linux_spinlock.THROUGHPUT linux_spinlock.LATENCY	\
 	ck_ticket_pb.THROUGHPUT ck_ticket_pb.LATENCY		\
 	ck_anderson.THROUGHPUT ck_anderson.LATENCY		\
-	ck_spinlock.THROUGHPUT ck_spinlock.LATENCY
+	ck_spinlock.THROUGHPUT ck_spinlock.LATENCY		\
+	ck_hclh.THROUGHPUT ck_hclh.LATENCY
 
 all: $(OBJECTS)
 
@@ -55,6 +56,12 @@ ck_clh.THROUGHPUT: ck_clh.c
 ck_clh.LATENCY: ck_clh.c
 	$(CC) -DLATENCY $(CFLAGS) -o ck_clh.LATENCY ck_clh.c -lm
 
+ck_hclh.THROUGHPUT: ck_hclh.c
+	$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_hclh.THROUGHPUT ck_hclh.c -lm
+
+ck_hclh.LATENCY: ck_hclh.c
+	$(CC) -DLATENCY $(CFLAGS) -o ck_hclh.LATENCY ck_hclh.c -lm
+
 linux_spinlock.THROUGHPUT: linux_spinlock.c
 	$(CC) -DTHROUGHPUT $(CFLAGS) -o linux_spinlock.THROUGHPUT linux_spinlock.c -lm
 
diff --git a/regressions/ck_spinlock/benchmark/latency.h b/regressions/ck_spinlock/benchmark/latency.h
index 6ca84a0..8abc9fe 100644
--- a/regressions/ck_spinlock/benchmark/latency.h
+++ b/regressions/ck_spinlock/benchmark/latency.h
@@ -53,6 +53,7 @@ main(void)
 	#endif
 
 	uint64_t s_b, e_b, i;
+	int core = 0;
 
 	s_b = rdtsc();
 	for (i = 0; i < STEPS; ++i) {
diff --git a/regressions/ck_spinlock/benchmark/throughput.h b/regressions/ck_spinlock/benchmark/throughput.h
index f2a8c63..c7271b2 100644
--- a/regressions/ck_spinlock/benchmark/throughput.h
+++ b/regressions/ck_spinlock/benchmark/throughput.h
@@ -70,6 +70,7 @@ LOCK_DEFINE;
 CK_CC_USED static void
 gen_lock(void)
 {
+	int core = 0;
 #ifdef LOCK_STATE
 	LOCK_STATE;
 #endif
@@ -101,8 +102,9 @@ fairness(void *null)
 	unsigned int i = context->tid;
 	volatile int j;
 	long int base;
+	unsigned int core;
 
-        if (aff_iterate(&a)) {
+        if (aff_iterate_core(&a, &core)) {
                 perror("ERROR: Could not affine thread");
                 exit(EXIT_FAILURE);
         }
diff --git a/regressions/ck_spinlock/validate/Makefile b/regressions/ck_spinlock/validate/Makefile
index da37bae..731b68b 100644
--- a/regressions/ck_spinlock/validate/Makefile
+++ b/regressions/ck_spinlock/validate/Makefile
@@ -1,7 +1,7 @@
 .PHONY: check clean
 
 all: ck_ticket ck_mcs ck_dec ck_cas ck_fas ck_clh linux_spinlock \
-     ck_ticket_pb ck_anderson ck_spinlock
+     ck_ticket_pb ck_anderson ck_spinlock ck_hclh
 
 check: all
 	./ck_ticket $(CORES) 1
@@ -27,6 +27,9 @@ ck_ticket_pb: ck_ticket_pb.c
 ck_clh: ck_clh.c
 	$(CC) $(CFLAGS) -o ck_clh ck_clh.c
 
+ck_hclh: ck_hclh.c
+	$(CC) $(CFLAGS) -o ck_hclh ck_hclh.c
+
 ck_anderson: ck_anderson.c
 	$(CC) $(CFLAGS) -o ck_anderson ck_anderson.c
 
diff --git a/regressions/ck_spinlock/validate/validate.h b/regressions/ck_spinlock/validate/validate.h
index 42998ab..6c45d23 100644
--- a/regressions/ck_spinlock/validate/validate.h
+++ b/regressions/ck_spinlock/validate/validate.h
@@ -63,8 +63,9 @@ thread(void *null CK_CC_UNUSED)
 #endif
 	unsigned int i = ITERATE;
 	unsigned int j;
+	unsigned int core;
 
-        if (aff_iterate(&a)) {
+        if (aff_iterate_core(&a, &core)) {
                 perror("ERROR: Could not affine thread");
                 exit(EXIT_FAILURE);
         }