ck_pring
Brendon Scheinman 12 years ago
commit cbfa095108

1
.gitignore vendored

@ -140,6 +140,7 @@ regressions/ck_brlock/benchmark/throughput
regressions/ck_rwlock/benchmark/throughput
regressions/ck_queue/validate/ck_list
regressions/ck_queue/validate/ck_slist
regressions/ck_queue/validate/ck_stailq
regressions/ck_cohort/validate/validate
regressions/ck_cohort/benchmark/ck_cohort.LATENCY
regressions/ck_cohort/benchmark/ck_cohort.THROUGHPUT

@ -83,7 +83,7 @@ ck_brlock_write_lock(struct ck_brlock *br)
while (ck_pr_fas_uint(&br->writer, true) == true)
ck_pr_stall();
ck_pr_fence_memory();
ck_pr_fence_atomic_load();
/* The reader list is protected under the writer br. */
for (cursor = br->readers; cursor != NULL; cursor = cursor->next) {
@ -121,7 +121,7 @@ ck_brlock_write_trylock(struct ck_brlock *br, unsigned int factor)
* We do not require a strict fence here as atomic RMW operations
* are serializing.
*/
ck_pr_fence_memory();
ck_pr_fence_atomic_load();
for (cursor = br->readers; cursor != NULL; cursor = cursor->next) {
while (ck_pr_load_uint(&cursor->n_readers) != 0) {
@ -190,13 +190,19 @@ ck_brlock_read_lock(struct ck_brlock *br, struct ck_brlock_reader *reader)
#if defined(__x86__) || defined(__x86_64__)
ck_pr_fas_uint(&reader->n_readers, 1);
/* Serialize counter update with respect to writer snapshot. */
ck_pr_fence_memory();
/*
* Serialize reader counter update with respect to load of
* writer.
*/
ck_pr_fence_atomic_load();
#else
ck_pr_store_uint(&reader->n_readers, 1);
/* Loads can be re-ordered before previous stores, even on TSO. */
ck_pr_fence_strict_memory();
/*
* Serialize reader counter update with respect to load of
* writer.
*/
ck_pr_fence_store_load();
#endif
if (ck_pr_load_uint(&br->writer) == false)
@ -229,10 +235,23 @@ ck_brlock_read_trylock(struct ck_brlock *br,
ck_pr_stall();
}
#if defined(__x86__) || defined(__x86_64__)
ck_pr_fas_uint(&reader->n_readers, 1);
/*
* Serialize reader counter update with respect to load of
* writer.
*/
ck_pr_fence_atomic_load();
#else
ck_pr_store_uint(&reader->n_readers, 1);
/* Loads are re-ordered with respect to prior stores. */
ck_pr_fence_strict_memory();
/*
* Serialize reader counter update with respect to load of
* writer.
*/
ck_pr_fence_store_load();
#endif
if (ck_pr_load_uint(&br->writer) == false)
break;

@ -93,7 +93,7 @@ ck_bytelock_write_lock(struct ck_bytelock *bytelock, unsigned int slot)
ck_pr_store_8(&bytelock->readers[slot - 1], false);
/* Wait for slotted readers to drain out. */
ck_pr_fence_strict_memory();
ck_pr_fence_store_load();
for (i = 0; i < sizeof(bytelock->readers) / CK_BYTELOCK_LENGTH; i++) {
while (CK_BYTELOCK_LOAD((CK_BYTELOCK_TYPE *)&readers[i]) != false)
ck_pr_stall();
@ -134,7 +134,7 @@ ck_bytelock_read_lock(struct ck_bytelock *bytelock, unsigned int slot)
if (slot > sizeof bytelock->readers) {
for (;;) {
ck_pr_inc_uint(&bytelock->n_readers);
ck_pr_fence_memory();
ck_pr_fence_atomic_load();
if (ck_pr_load_uint(&bytelock->owner) == 0)
break;
ck_pr_dec_uint(&bytelock->n_readers);
@ -150,7 +150,7 @@ ck_bytelock_read_lock(struct ck_bytelock *bytelock, unsigned int slot)
slot -= 1;
for (;;) {
ck_pr_store_8(&bytelock->readers[slot], true);
ck_pr_fence_strict_memory();
ck_pr_fence_store_load();
/*
* If there is no owner at this point, our slot has

@ -97,12 +97,11 @@ ck_epoch_begin(ck_epoch_t *epoch, ck_epoch_record_t *record)
/*
* It is possible for loads to be re-ordered before the store
* is committed into the caller's epoch and active fields.
* Execute a full barrier to serialize stores with respect to
* loads
* For this reason, store to load serialization is necessary.
*/
ck_pr_store_uint(&record->epoch, g_epoch);
ck_pr_store_uint(&record->active, 1);
ck_pr_fence_strict_memory();
ck_pr_fence_store_load();
return;
}

@ -81,7 +81,7 @@ ck_hp_fifo_enqueue_mpmc(ck_hp_record_t *record,
for (;;) {
tail = ck_pr_load_ptr(&fifo->tail);
ck_hp_set(record, 0, tail);
ck_pr_fence_strict_memory();
ck_pr_fence_store_load();
if (tail != ck_pr_load_ptr(&fifo->tail))
continue;
@ -112,7 +112,7 @@ ck_hp_fifo_tryenqueue_mpmc(ck_hp_record_t *record,
tail = ck_pr_load_ptr(&fifo->tail);
ck_hp_set(record, 0, tail);
ck_pr_fence_strict_memory();
ck_pr_fence_store_load();
if (tail != ck_pr_load_ptr(&fifo->tail))
return false;
@ -140,13 +140,13 @@ ck_hp_fifo_dequeue_mpmc(ck_hp_record_t *record,
ck_pr_fence_load();
tail = ck_pr_load_ptr(&fifo->tail);
ck_hp_set(record, 0, head);
ck_pr_fence_strict_memory();
ck_pr_fence_store_load();
if (head != ck_pr_load_ptr(&fifo->head))
continue;
next = ck_pr_load_ptr(&head->next);
ck_hp_set(record, 1, next);
ck_pr_fence_strict_memory();
ck_pr_fence_store_load();
if (head != ck_pr_load_ptr(&fifo->head))
continue;
@ -175,13 +175,13 @@ ck_hp_fifo_trydequeue_mpmc(ck_hp_record_t *record,
ck_pr_fence_load();
tail = ck_pr_load_ptr(&fifo->tail);
ck_hp_set(record, 0, head);
ck_pr_fence_strict_memory();
ck_pr_fence_store_load();
if (head != ck_pr_load_ptr(&fifo->head))
return NULL;
next = ck_pr_load_ptr(&head->next);
ck_hp_set(record, 1, next);
ck_pr_fence_strict_memory();
ck_pr_fence_store_load();
if (head != ck_pr_load_ptr(&fifo->head))
return NULL;

@ -62,7 +62,7 @@ ck_hp_stack_pop_mpmc(ck_hp_record_t *record, struct ck_stack *target)
return NULL;
ck_hp_set(record, 0, entry);
ck_pr_fence_strict_memory();
ck_pr_fence_store_load();
} while (entry != ck_pr_load_ptr(&target->head));
while (ck_pr_cas_ptr_value(&target->head, entry, entry->next, &entry) == false) {
@ -70,11 +70,11 @@ ck_hp_stack_pop_mpmc(ck_hp_record_t *record, struct ck_stack *target)
return NULL;
ck_hp_set(record, 0, entry);
ck_pr_fence_strict_memory();
ck_pr_fence_store_load();
update = ck_pr_load_ptr(&target->head);
while (entry != update) {
ck_hp_set(record, 0, update);
ck_pr_fence_strict_memory();
ck_pr_fence_store_load();
entry = update;
update = ck_pr_load_ptr(&target->head);
if (update == NULL)
@ -95,7 +95,7 @@ ck_hp_stack_trypop_mpmc(ck_hp_record_t *record, struct ck_stack *target, struct
return false;
ck_hp_set(record, 0, entry);
ck_pr_fence_strict_memory();
ck_pr_fence_store_load();
if (entry != ck_pr_load_ptr(&target->head))
goto leave;

@ -30,6 +30,7 @@
#include <ck_cc.h>
#include <ck_limits.h>
#include <ck_md.h>
#include <ck_stdint.h>
#include <stdbool.h>
@ -43,12 +44,97 @@
#include "gcc/ppc64/ck_pr.h"
#elif defined(__ppc__)
#include "gcc/ppc/ck_pr.h"
#elif defined(__GNUC__)
#include "gcc/ck_pr.h"
#else
#elif !defined(__GNUC__)
#error Your platform is unsupported
#endif
#if defined(__GNUC__)
#include "gcc/ck_pr.h"
#endif
#define CK_PR_FENCE_EMIT(T) \
CK_CC_INLINE static void \
ck_pr_fence_##T(void) \
{ \
ck_pr_fence_strict_##T(); \
}
#define CK_PR_FENCE_NOOP(T) \
CK_CC_INLINE static void \
ck_pr_fence_##T(void) \
{ \
return; \
}
/*
* None of the currently supported platforms allow for data-dependent
* load ordering.
*/
CK_PR_FENCE_NOOP(load_depends)
#define ck_pr_fence_strict_load_depends ck_pr_fence_load_depends
/*
* In memory models where atomic operations do not have serializing
* effects, atomic read-modify-write operations are modeled as stores.
*/
#if defined(CK_MD_RMO)
/*
* Only stores to the same location have a global
* ordering.
*/
CK_PR_FENCE_EMIT(atomic)
CK_PR_FENCE_EMIT(atomic_atomic)
CK_PR_FENCE_EMIT(atomic_load)
CK_PR_FENCE_EMIT(atomic_store)
CK_PR_FENCE_EMIT(store_atomic)
CK_PR_FENCE_EMIT(load_atomic)
CK_PR_FENCE_EMIT(load_load)
CK_PR_FENCE_EMIT(load_store)
CK_PR_FENCE_EMIT(store_store)
CK_PR_FENCE_EMIT(store_load)
CK_PR_FENCE_EMIT(load)
CK_PR_FENCE_EMIT(store)
CK_PR_FENCE_EMIT(memory)
#elif defined(CK_MD_PSO)
/*
* Anything can be re-ordered with respect to stores.
* Otherwise, loads are executed in-order.
*/
CK_PR_FENCE_EMIT(atomic)
CK_PR_FENCE_EMIT(atomic_atomic)
CK_PR_FENCE_NOOP(atomic_load)
CK_PR_FENCE_EMIT(atomic_store)
CK_PR_FENCE_EMIT(store_atomic)
CK_PR_FENCE_NOOP(load_atomic)
CK_PR_FENCE_NOOP(load_load)
CK_PR_FENCE_EMIT(load_store)
CK_PR_FENCE_EMIT(store_store)
CK_PR_FENCE_EMIT(store_load)
CK_PR_FENCE_NOOP(load)
CK_PR_FENCE_EMIT(store)
CK_PR_FENCE_EMIT(memory)
#elif defined(CK_MD_TSO)
/*
* Only loads are re-ordered and only with respect to
* prior stores. Atomic operations are serializing.
*/
CK_PR_FENCE_NOOP(atomic)
CK_PR_FENCE_NOOP(atomic_atomic)
CK_PR_FENCE_NOOP(atomic_load)
CK_PR_FENCE_NOOP(atomic_store)
CK_PR_FENCE_NOOP(store_atomic)
CK_PR_FENCE_NOOP(load_atomic)
CK_PR_FENCE_NOOP(load_load)
CK_PR_FENCE_NOOP(load_store)
CK_PR_FENCE_NOOP(store_store)
CK_PR_FENCE_EMIT(store_load)
CK_PR_FENCE_NOOP(load)
CK_PR_FENCE_NOOP(store)
CK_PR_FENCE_NOOP(memory)
#endif /* CK_MD_TSO */
#undef CK_PR_FENCE_EMIT
#undef CK_PR_FENCE_NOOP
#define CK_PR_BIN(K, S, M, T, P, C) \
CK_CC_INLINE static void \
ck_pr_##K##_##S(M *target, T value) \

@ -74,7 +74,8 @@ ck_rwlock_write_trylock(ck_rwlock_t *rw)
if (ck_pr_fas_uint(&rw->writer, 1) != 0)
return false;
ck_pr_fence_memory();
ck_pr_fence_atomic_load();
if (ck_pr_load_uint(&rw->n_readers) != 0) {
ck_rwlock_write_unlock(rw);
return false;
@ -90,7 +91,7 @@ ck_rwlock_write_lock(ck_rwlock_t *rw)
while (ck_pr_fas_uint(&rw->writer, 1) != 0)
ck_pr_stall();
ck_pr_fence_memory();
ck_pr_fence_atomic_load();
while (ck_pr_load_uint(&rw->n_readers) != 0)
ck_pr_stall();
@ -111,16 +112,15 @@ ck_rwlock_read_trylock(ck_rwlock_t *rw)
* Serialize with respect to concurrent write
* lock operation.
*/
ck_pr_fence_memory();
if (ck_pr_load_uint(&rw->writer) == 0)
goto leave;
ck_pr_fence_atomic_load();
if (ck_pr_load_uint(&rw->writer) == 0) {
ck_pr_fence_load();
return true;
}
ck_pr_dec_uint(&rw->n_readers);
return false;
leave:
/* Acquire semantics are necessary. */
ck_pr_fence_load();
return true;
}
CK_CC_INLINE static void
@ -137,7 +137,8 @@ ck_rwlock_read_lock(ck_rwlock_t *rw)
* Serialize with respect to concurrent write
* lock operation.
*/
ck_pr_fence_memory();
ck_pr_fence_atomic_load();
if (ck_pr_load_uint(&rw->writer) == 0)
break;
ck_pr_dec_uint(&rw->n_readers);
@ -180,7 +181,7 @@ ck_rwlock_recursive_write_lock(ck_rwlock_recursive_t *rw, unsigned int tid)
while (ck_pr_cas_uint(&rw->rw.writer, 0, tid) == false)
ck_pr_stall();
ck_pr_fence_memory();
ck_pr_fence_atomic_load();
while (ck_pr_load_uint(&rw->rw.n_readers) != 0)
ck_pr_stall();
@ -202,7 +203,7 @@ ck_rwlock_recursive_write_trylock(ck_rwlock_recursive_t *rw, unsigned int tid)
if (ck_pr_cas_uint(&rw->rw.writer, 0, tid) == false)
return false;
ck_pr_fence_memory();
ck_pr_fence_atomic_load();
if (ck_pr_load_uint(&rw->rw.n_readers) != 0) {
ck_pr_store_uint(&rw->rw.writer, 0);

@ -142,7 +142,7 @@ ck_spinlock_anderson_lock(struct ck_spinlock_anderson *lock,
/* Prepare slot for potential re-use by another thread. */
ck_pr_store_uint(&lock->slots[position].locked, true);
ck_pr_fence_store();
ck_pr_fence_memory();
*slot = lock->slots + position;
return;
@ -194,7 +194,7 @@ ck_spinlock_fas_trylock(struct ck_spinlock_fas *lock)
if (value == false)
ck_pr_fence_memory();
return (!value);
return !value;
}
CK_CC_INLINE static bool
@ -268,7 +268,7 @@ ck_spinlock_cas_trylock(struct ck_spinlock_cas *lock)
if (value == false)
ck_pr_fence_memory();
return (!value);
return !value;
}
CK_CC_INLINE static bool
@ -658,9 +658,9 @@ CK_CC_INLINE static bool
ck_spinlock_mcs_trylock(struct ck_spinlock_mcs **queue, struct ck_spinlock_mcs *node)
{
ck_pr_store_uint(&node->locked, true);
ck_pr_store_ptr(&node->next, NULL);
ck_pr_fence_store();
node->locked = true;
node->next = NULL;
ck_pr_fence_store_atomic();
if (ck_pr_cas_ptr(queue, NULL, node) == true) {
ck_pr_fence_load();
@ -686,24 +686,24 @@ ck_spinlock_mcs_lock(struct ck_spinlock_mcs **queue, struct ck_spinlock_mcs *nod
* In the case that there is a successor, let them know they must wait
* for us to unlock.
*/
ck_pr_store_uint(&node->locked, true);
ck_pr_store_ptr(&node->next, NULL);
node->locked = true;
node->next = NULL;
ck_pr_fence_store_atomic();
/*
* Swap current tail with current lock request. If the swap operation
* returns NULL, it means the queue was empty. If the queue was empty,
* then the operation is complete.
*/
ck_pr_fence_memory();
previous = ck_pr_fas_ptr(queue, node);
if (previous == NULL)
return;
/* Let the previous lock holder know that we are waiting on them. */
ck_pr_store_ptr(&previous->next, node);
while (ck_pr_load_uint(&node->locked) == true)
ck_pr_stall();
if (previous != NULL) {
/* Let the previous lock holder know that we are waiting on them. */
ck_pr_store_ptr(&previous->next, node);
while (ck_pr_load_uint(&node->locked) == true)
ck_pr_stall();
}
ck_pr_fence_load();
return;
}
@ -712,6 +712,8 @@ ck_spinlock_mcs_unlock(struct ck_spinlock_mcs **queue, struct ck_spinlock_mcs *n
{
struct ck_spinlock_mcs *next;
ck_pr_fence_memory();
next = ck_pr_load_ptr(&node->next);
if (next == NULL) {
/*
@ -721,7 +723,6 @@ ck_spinlock_mcs_unlock(struct ck_spinlock_mcs **queue, struct ck_spinlock_mcs *n
*/
if (ck_pr_load_ptr(queue) == node &&
ck_pr_cas_ptr(queue, node, NULL) == true) {
ck_pr_fence_memory();
return;
}
@ -740,9 +741,7 @@ ck_spinlock_mcs_unlock(struct ck_spinlock_mcs **queue, struct ck_spinlock_mcs *n
}
/* Allow the next lock operation to complete. */
ck_pr_fence_memory();
ck_pr_store_uint(&next->locked, false);
return;
}
#endif /* CK_F_SPINLOCK_MCS */

@ -31,9 +31,21 @@
#error Do not include this file directly, use ck_pr.h
#endif
#include <ck_cc.h>
CK_CC_INLINE static void
ck_pr_barrier(void)
{
__asm__ __volatile__("" ::: "memory");
return;
}
#ifndef CK_F_PR
#define CK_F_PR
#include <stdbool.h>
#include <ck_stdint.h>
#include <ck_cc.h>
/*
* The following represent supported atomic operations.
@ -93,45 +105,32 @@ ck_pr_stall(void)
return;
}
/*
* Most target architectures do not require this.
*/
CK_CC_INLINE static void
ck_pr_fence_load_depends(void)
{
__sync_synchronize();
return;
}
/*
* Load and store fences are equivalent to full fences in the GCC port.
*/
#define CK_PR_FENCE(T) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__sync_synchronize(); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__sync_synchronize(); \
}
CK_PR_FENCE(atomic)
CK_PR_FENCE(atomic_atomic)
CK_PR_FENCE(atomic_load)
CK_PR_FENCE(atomic_store)
CK_PR_FENCE(store_atomic)
CK_PR_FENCE(load_atomic)
CK_PR_FENCE(load)
CK_PR_FENCE(load_load)
CK_PR_FENCE(load_store)
CK_PR_FENCE(store)
CK_PR_FENCE(store_store)
CK_PR_FENCE(store_load)
CK_PR_FENCE(memory)
#undef CK_PR_FENCE
CK_CC_INLINE static void
ck_pr_barrier(void)
{
__asm__ __volatile__("" ::: "memory");
return;
}
/*
* Atomic compare and swap.
*/
@ -275,5 +274,5 @@ CK_PR_UNARY_S(8, uint8_t)
#undef CK_PR_UNARY_S
#undef CK_PR_UNARY
#endif /* !CK_F_PR */
#endif /* _CK_PR_GCC_H */

@ -41,6 +41,11 @@
*/
#include "ck_f_pr.h"
/*
* Minimum interface requirement met.
*/
#define CK_F_PR
/*
* This bounces the hardware thread from low to medium
* priority. I am unsure of the benefits of this approach
@ -55,45 +60,29 @@ ck_pr_stall(void)
return;
}
#if defined(CK_MD_RMO) || defined(CK_MD_PSO)
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
}
#else
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__("" ::: "memory"); \
}
#endif /* !CK_MD_RMO && !CK_MD_PSO */
CK_PR_FENCE(load_depends, "")
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
}
CK_PR_FENCE(atomic, "lwsync")
CK_PR_FENCE(atomic_atomic, "lwsync")
CK_PR_FENCE(atomic_store, "lwsync")
CK_PR_FENCE(atomic_load, "sync")
CK_PR_FENCE(store_atomic, "lwsync")
CK_PR_FENCE(load_atomic, "lwsync")
CK_PR_FENCE(store, "lwsync")
CK_PR_FENCE(store_store, "lwsync")
CK_PR_FENCE(store_load, "sync")
CK_PR_FENCE(load, "lwsync")
CK_PR_FENCE(load_load, "lwsync")
CK_PR_FENCE(load_store, "lwsync")
CK_PR_FENCE(memory, "sync")
#undef CK_PR_FENCE
CK_CC_INLINE static void
ck_pr_barrier(void)
{
__asm__ __volatile__("" ::: "memory");
return;
}
#define CK_PR_LOAD(S, M, T, C, I) \
CK_CC_INLINE static T \
ck_pr_load_##S(const M *target) \

@ -40,6 +40,11 @@
*/
#include "ck_f_pr.h"
/*
* Minimum interface requirement met.
*/
#define CK_F_PR
/*
* This bounces the hardware thread from low to medium
* priority. I am unsure of the benefits of this approach
@ -54,49 +59,33 @@ ck_pr_stall(void)
return;
}
#if defined(CK_MD_RMO) || defined(CK_MD_PSO)
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
}
#else
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__("" ::: "memory"); \
}
#endif /* !CK_MD_RMO && !CK_MD_PSO */
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
}
/*
* These are derived from:
* http://www.ibm.com/developerworks/systems/articles/powerpc.html
*/
CK_PR_FENCE(load_depends, "")
CK_PR_FENCE(atomic, "lwsync")
CK_PR_FENCE(atomic_atomic, "lwsync")
CK_PR_FENCE(atomic_store, "lwsync")
CK_PR_FENCE(atomic_load, "sync")
CK_PR_FENCE(store_atomic, "lwsync")
CK_PR_FENCE(load_atomic, "lwsync")
CK_PR_FENCE(store, "lwsync")
CK_PR_FENCE(store_store, "lwsync")
CK_PR_FENCE(store_load, "sync")
CK_PR_FENCE(load, "lwsync")
CK_PR_FENCE(load_load, "lwsync")
CK_PR_FENCE(load_store, "lwsync")
CK_PR_FENCE(memory, "sync")
#undef CK_PR_FENCE
CK_CC_INLINE static void
ck_pr_barrier(void)
{
__asm__ __volatile__("" ::: "memory");
return;
}
#define CK_PR_LOAD(S, M, T, C, I) \
CK_CC_INLINE static T \
ck_pr_load_##S(const M *target) \

@ -40,6 +40,11 @@
*/
#include "ck_f_pr.h"
/*
* Minimum interface requirement met.
*/
#define CK_F_PR
/*
* Order loads at the least.
*/
@ -51,51 +56,33 @@ ck_pr_stall(void)
return;
}
#if defined(CK_MD_RMO) || defined(CK_MD_PSO)
/*
* If RMO is forced, then do not assume TSO model.
*/
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
}
#else
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
}
/*
* By default, we will assume TSO model is used on SPARCv9.
* Atomic operations are treated as both load and store
* operations on SPARCv9.
*/
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__("" ::: "memory"); \
}
#endif /* !CK_MD_RMO && !CK_MD_PSO */
CK_PR_FENCE(load_depends, "")
CK_PR_FENCE(atomic_atomic, "membar #StoreStore")
CK_PR_FENCE(atomic, "membar #StoreStore")
CK_PR_FENCE(atomic_store, "membar #StoreStore")
CK_PR_FENCE(atomic_load, "membar #StoreLoad")
CK_PR_FENCE(store_atomic, "membar #StoreStore")
CK_PR_FENCE(load_atomic, "membar #LoadStore")
CK_PR_FENCE(store, "membar #StoreStore")
CK_PR_FENCE(store_store, "membar #StoreStore")
CK_PR_FENCE(store_load, "membar #StoreLoad")
CK_PR_FENCE(load, "membar #LoadLoad")
CK_PR_FENCE(load_load, "membar #LoadLoad")
CK_PR_FENCE(load_store, "membar #LoadStore")
CK_PR_FENCE(memory, "membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
#undef CK_PR_FENCE
CK_CC_INLINE static void
ck_pr_barrier(void)
{
__asm__ __volatile__("" ::: "memory");
return;
}
#define CK_PR_LOAD(S, M, T, C, I) \
CK_CC_INLINE static T \
ck_pr_load_##S(const M *target) \

@ -63,52 +63,29 @@ ck_pr_stall(void)
return;
}
#if defined(CK_MD_RMO) || defined(CK_MD_PSO)
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
}
#else
/*
* IA32 has strong memory ordering guarantees, so memory
* fences are enabled if and only if the user specifies that
* that the program will be using non-temporal instructions.
* Otherwise, an optimization barrier is used in order to prevent
* compiler re-ordering of loads and stores across the barrier.
*/
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__("" ::: "memory"); \
}
#endif /* !CK_MD_RMO && !CK_MD_PSO */
CK_PR_FENCE(atomic, "sfence")
CK_PR_FENCE(atomic_atomic, "sfence")
CK_PR_FENCE(atomic_store, "sfence")
CK_PR_FENCE(atomic_load, "mfence")
CK_PR_FENCE(store_atomic, "sfence")
CK_PR_FENCE(load_atomic, "mfence")
CK_PR_FENCE(load, "lfence")
CK_PR_FENCE(load_depends, "")
CK_PR_FENCE(load_load, "lfence")
CK_PR_FENCE(load_store, "mfence")
CK_PR_FENCE(store, "sfence")
CK_PR_FENCE(store_store, "sfence")
CK_PR_FENCE(store_load, "mfence")
CK_PR_FENCE(memory, "mfence")
#undef CK_PR_FENCE
CK_CC_INLINE static void
ck_pr_barrier(void)
{
__asm__ __volatile__("" ::: "memory");
return;
}
/*
* Atomic fetch-and-store operations.
*/

@ -62,52 +62,27 @@ ck_pr_stall(void)
return;
}
#if defined(CK_MD_RMO) || defined(CK_MD_PSO)
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
}
#else
/*
* IA32 has strong memory ordering guarantees, so memory
* fences are enabled if and only if the user specifies that
* that the program will be using non-temporal instructions.
* Otherwise, an optimization barrier is used in order to prevent
* compiler re-ordering of loads and stores across the barrier.
*/
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__("" ::: "memory"); \
}
#endif /* !CK_MD_RMO && !CK_MD_PSO */
CK_PR_FENCE(atomic_store, "sfence")
CK_PR_FENCE(atomic_load, "mfence")
CK_PR_FENCE(store_atomic, "sfence")
CK_PR_FENCE(load_atomic, "mfence")
CK_PR_FENCE(load, "lfence")
CK_PR_FENCE(load_depends, "")
CK_PR_FENCE(load_load, "lfence")
CK_PR_FENCE(load_store, "mfence")
CK_PR_FENCE(store, "sfence")
CK_PR_FENCE(store_store, "sfence")
CK_PR_FENCE(store_load, "mfence")
CK_PR_FENCE(memory, "mfence")
#undef CK_PR_FENCE
CK_CC_INLINE static void
ck_pr_barrier(void)
{
__asm__ __volatile__("" ::: "memory");
return;
}
/*
* Atomic fetch-and-store operations.
*/

@ -3,16 +3,16 @@
all: ck_pr_cas_64 ck_pr_fas_64 ck_pr_cas_64_2
ck_pr_cas_64_2: ck_pr_cas_64_2.c
$(CC) $(CFLAGS) -o ck_pr_cas_64_2 ck_pr_cas_64_2.c
$(CC) $(CFLAGS) -o ck_pr_cas_64_2 ck_pr_cas_64_2.c -lm
ck_pr_cas_64: ck_pr_cas_64.c
$(CC) $(CFLAGS) -o ck_pr_cas_64 ck_pr_cas_64.c
$(CC) $(CFLAGS) -o ck_pr_cas_64 ck_pr_cas_64.c -lm
ck_pr_fas_64: ck_pr_fas_64.c
$(CC) $(CFLAGS) -o ck_pr_fas_64 ck_pr_fas_64.c
$(CC) $(CFLAGS) -o ck_pr_fas_64 ck_pr_fas_64.c -lm
clean:
rm -rf ck_pr_cas_64 ck_pr_fas_64 ck_pr_cas_64_2 *.dSYM *.exe
include ../../../build/regressions.build
CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE -lm
CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE

@ -14,67 +14,67 @@ OBJECTS=ck_ticket.THROUGHPUT ck_ticket.LATENCY \
all: $(OBJECTS)
ck_spinlock.THROUGHPUT: ck_spinlock.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_spinlock.THROUGHPUT ck_spinlock.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_spinlock.THROUGHPUT ck_spinlock.c -lm
ck_spinlock.LATENCY: ck_spinlock.c
$(CC) -DLATENCY $(CFLAGS) -o ck_spinlock.LATENCY ck_spinlock.c
$(CC) -DLATENCY $(CFLAGS) -o ck_spinlock.LATENCY ck_spinlock.c -lm
ck_ticket.THROUGHPUT: ck_ticket.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_ticket.THROUGHPUT ck_ticket.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_ticket.THROUGHPUT ck_ticket.c -lm
ck_ticket.LATENCY: ck_ticket.c
$(CC) -DLATENCY $(CFLAGS) -o ck_ticket.LATENCY ck_ticket.c
$(CC) -DLATENCY $(CFLAGS) -o ck_ticket.LATENCY ck_ticket.c -lm
ck_mcs.THROUGHPUT: ck_mcs.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_mcs.THROUGHPUT ck_mcs.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_mcs.THROUGHPUT ck_mcs.c -lm
ck_mcs.LATENCY: ck_mcs.c
$(CC) -DLATENCY $(CFLAGS) -o ck_mcs.LATENCY ck_mcs.c
$(CC) -DLATENCY $(CFLAGS) -o ck_mcs.LATENCY ck_mcs.c -lm
ck_dec.THROUGHPUT: ck_dec.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_dec.THROUGHPUT ck_dec.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_dec.THROUGHPUT ck_dec.c -lm
ck_dec.LATENCY: ck_dec.c
$(CC) -DLATENCY $(CFLAGS) -o ck_dec.LATENCY ck_dec.c
$(CC) -DLATENCY $(CFLAGS) -o ck_dec.LATENCY ck_dec.c -lm
ck_cas.THROUGHPUT: ck_cas.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_cas.THROUGHPUT ck_cas.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_cas.THROUGHPUT ck_cas.c -lm
ck_cas.LATENCY: ck_cas.c
$(CC) -DLATENCY $(CFLAGS) -o ck_cas.LATENCY ck_cas.c
$(CC) -DLATENCY $(CFLAGS) -o ck_cas.LATENCY ck_cas.c -lm
ck_fas.THROUGHPUT: ck_fas.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_fas.THROUGHPUT ck_fas.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_fas.THROUGHPUT ck_fas.c -lm
ck_fas.LATENCY: ck_fas.c
$(CC) -DLATENCY $(CFLAGS) -o ck_fas.LATENCY ck_fas.c
$(CC) -DLATENCY $(CFLAGS) -o ck_fas.LATENCY ck_fas.c -lm
ck_clh.THROUGHPUT: ck_clh.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_clh.THROUGHPUT ck_clh.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_clh.THROUGHPUT ck_clh.c -lm
ck_clh.LATENCY: ck_clh.c
$(CC) -DLATENCY $(CFLAGS) -o ck_clh.LATENCY ck_clh.c
$(CC) -DLATENCY $(CFLAGS) -o ck_clh.LATENCY ck_clh.c -lm
linux_spinlock.THROUGHPUT: linux_spinlock.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o linux_spinlock.THROUGHPUT linux_spinlock.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o linux_spinlock.THROUGHPUT linux_spinlock.c -lm
linux_spinlock.LATENCY: linux_spinlock.c
$(CC) -DLATENCY $(CFLAGS) -o linux_spinlock.LATENCY linux_spinlock.c
$(CC) -DLATENCY $(CFLAGS) -o linux_spinlock.LATENCY linux_spinlock.c -lm
ck_ticket_pb.THROUGHPUT: ck_ticket_pb.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_ticket_pb.THROUGHPUT ck_ticket_pb.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_ticket_pb.THROUGHPUT ck_ticket_pb.c -lm
ck_ticket_pb.LATENCY: ck_ticket_pb.c
$(CC) -DLATENCY $(CFLAGS) -o ck_ticket_pb.LATENCY ck_ticket_pb.c
$(CC) -DLATENCY $(CFLAGS) -o ck_ticket_pb.LATENCY ck_ticket_pb.c -lm
ck_anderson.THROUGHPUT: ck_anderson.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_anderson.THROUGHPUT ck_anderson.c
$(CC) -DTHROUGHPUT $(CFLAGS) -o ck_anderson.THROUGHPUT ck_anderson.c -lm
ck_anderson.LATENCY: ck_anderson.c
$(CC) -DLATENCY $(CFLAGS) -o ck_anderson.LATENCY ck_anderson.c
$(CC) -DLATENCY $(CFLAGS) -o ck_anderson.LATENCY ck_anderson.c -lm
clean:
rm -rf *.dSYM *.exe $(OBJECTS)
include ../../../build/regressions.build
CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE -lm
CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE

Loading…
Cancel
Save