ck_pr: ck_pr_fence_X_Y interface has been added.

ck_pr_fence_{load_load,store_store,load_store,store_load} operations
have been added. In addition to this, it is no longer the responsibility
of architecture ports to determine when to emit a specific fence. Instead,
the underlying port will always emit the necessary instructions to
enforce strict ordering. The higher-level include/ck_pr implementation will
enforce whether or not a fence is necessary to be emitted according to
the memory model specified by ck_md (CK_MD_{TSO,RMO,PSO}).

In other words, only ck_pr_fence_strict_* is implemented by the MD-ck_pr
port.
ck_pring
Samy Al Bahra 12 years ago
parent ca35d3bb75
commit 44b769963f

@ -30,6 +30,7 @@
#include <ck_cc.h> #include <ck_cc.h>
#include <ck_limits.h> #include <ck_limits.h>
#include <ck_md.h>
#include <ck_stdint.h> #include <ck_stdint.h>
#include <stdbool.h> #include <stdbool.h>
@ -49,6 +50,66 @@
#error Your platform is unsupported #error Your platform is unsupported
#endif #endif
#define CK_PR_FENCE_EMIT(T) \
CK_CC_INLINE static void \
ck_pr_fence_##T(void) \
{ \
ck_pr_fence_strict_##T(); \
}
#define CK_PR_FENCE_NOOP(T) \
CK_CC_INLINE static void \
ck_pr_fence_##T(void) \
{ \
return; \
}
/*
* None of the currently supported platforms allow for data-dependent
* load ordering.
*/
CK_PR_FENCE_NOOP(load_depends)
#if defined(CK_MD_RMO)
/*
* Only stores to the same location have a global
* ordering.
*/
CK_PR_FENCE_EMIT(load_load)
CK_PR_FENCE_EMIT(load_store)
CK_PR_FENCE_EMIT(store_store)
CK_PR_FENCE_EMIT(store_load)
CK_PR_FENCE_EMIT(load)
CK_PR_FENCE_EMIT(store)
CK_PR_FENCE_EMIT(memory)
#elif defined(CK_MD_PSO)
/*
* Anything can be re-ordered with respect to stores.
* Otherwise, loads are executed in-order.
*/
CK_PR_FENCE_NOOP(load_load)
CK_PR_FENCE_EMIT(load_store)
CK_PR_FENCE_EMIT(store_store)
CK_PR_FENCE_EMIT(store_load)
CK_PR_FENCE_NOOP(load)
CK_PR_FENCE_EMIT(store)
CK_PR_FENCE_EMIT(memory)
#elif defined(CK_MD_TSO)
/*
* Only loads are re-ordered and only with respect to
* prior stores.
*/
CK_PR_FENCE_NOOP(load_load)
CK_PR_FENCE_NOOP(load_store)
CK_PR_FENCE_NOOP(store_store)
CK_PR_FENCE_EMIT(store_load)
CK_PR_FENCE_NOOP(load)
CK_PR_FENCE_NOOP(store)
CK_PR_FENCE_NOOP(memory)
#endif /* CK_MD_TSO */
#undef CK_PR_FENCE_EMIT
#undef CK_PR_FENCE_NOOP
#define CK_PR_BIN(K, S, M, T, P, C) \ #define CK_PR_BIN(K, S, M, T, P, C) \
CK_CC_INLINE static void \ CK_CC_INLINE static void \
ck_pr_##K##_##S(M *target, T value) \ ck_pr_##K##_##S(M *target, T value) \

@ -55,33 +55,20 @@ ck_pr_stall(void)
return; return;
} }
#if defined(CK_MD_RMO) || defined(CK_MD_PSO) #define CK_PR_FENCE(T, I) \
#define CK_PR_FENCE(T, I) \ CK_CC_INLINE static void \
CK_CC_INLINE static void \ ck_pr_fence_strict_##T(void) \
ck_pr_fence_strict_##T(void) \ { \
{ \ __asm__ __volatile__(I ::: "memory"); \
__asm__ __volatile__(I ::: "memory"); \ }
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
}
#else
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__("" ::: "memory"); \
}
#endif /* !CK_MD_RMO && !CK_MD_PSO */
CK_PR_FENCE(load_depends, "") CK_PR_FENCE(load_depends, "")
CK_PR_FENCE(store, "lwsync") CK_PR_FENCE(store, "lwsync")
CK_PR_FENCE(store_store, "lwsync")
CK_PR_FENCE(store_load, "sync")
CK_PR_FENCE(load, "lwsync") CK_PR_FENCE(load, "lwsync")
CK_PR_FENCE(load_load, "lwsync")
CK_PR_FENCE(load_store, "lwsync")
CK_PR_FENCE(memory, "sync") CK_PR_FENCE(memory, "sync")
#undef CK_PR_FENCE #undef CK_PR_FENCE

@ -54,29 +54,12 @@ ck_pr_stall(void)
return; return;
} }
#if defined(CK_MD_RMO) || defined(CK_MD_PSO) #define CK_PR_FENCE(T, I) \
#define CK_PR_FENCE(T, I) \ CK_CC_INLINE static void \
CK_CC_INLINE static void \ ck_pr_fence_strict_##T(void) \
ck_pr_fence_strict_##T(void) \ { \
{ \ __asm__ __volatile__(I ::: "memory"); \
__asm__ __volatile__(I ::: "memory"); \ }
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
}
#else
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__("" ::: "memory"); \
}
#endif /* !CK_MD_RMO && !CK_MD_PSO */
/* /*
* These are derived from: * These are derived from:
@ -84,7 +67,11 @@ ck_pr_stall(void)
*/ */
CK_PR_FENCE(load_depends, "") CK_PR_FENCE(load_depends, "")
CK_PR_FENCE(store, "lwsync") CK_PR_FENCE(store, "lwsync")
CK_PR_FENCE(store_store, "lwsync")
CK_PR_FENCE(store_load, "sync")
CK_PR_FENCE(load, "lwsync") CK_PR_FENCE(load, "lwsync")
CK_PR_FENCE(load_load, "lwsync")
CK_PR_FENCE(load_store, "lwsync")
CK_PR_FENCE(memory, "sync") CK_PR_FENCE(memory, "sync")
#undef CK_PR_FENCE #undef CK_PR_FENCE

@ -51,39 +51,20 @@ ck_pr_stall(void)
return; return;
} }
#if defined(CK_MD_RMO) || defined(CK_MD_PSO) #define CK_PR_FENCE(T, I) \
/* CK_CC_INLINE static void \
* If RMO is forced, then do not assume TSO model. ck_pr_fence_strict_##T(void) \
*/ { \
#define CK_PR_FENCE(T, I) \ __asm__ __volatile__(I ::: "memory"); \
CK_CC_INLINE static void \ }
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
}
#else
/*
* By default, we will assume TSO model is used on SPARCv9.
*/
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__("" ::: "memory"); \
}
#endif /* !CK_MD_RMO && !CK_MD_PSO */
CK_PR_FENCE(load_depends, "") CK_PR_FENCE(load_depends, "")
CK_PR_FENCE(store, "membar #StoreStore") CK_PR_FENCE(store, "membar #StoreStore")
CK_PR_FENCE(store_store, "membar #StoreStore")
CK_PR_FENCE(store_load, "membar #StoreLoad")
CK_PR_FENCE(load, "membar #LoadLoad") CK_PR_FENCE(load, "membar #LoadLoad")
CK_PR_FENCE(load_load, "membar #LoadLoad")
CK_PR_FENCE(load_store, "membar #LoadStore")
CK_PR_FENCE(memory, "membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad") CK_PR_FENCE(memory, "membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
#undef CK_PR_FENCE #undef CK_PR_FENCE

@ -63,40 +63,19 @@ ck_pr_stall(void)
return; return;
} }
#if defined(CK_MD_RMO) || defined(CK_MD_PSO)
#define CK_PR_FENCE(T, I) \ #define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \ CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \ ck_pr_fence_strict_##T(void) \
{ \ { \
__asm__ __volatile__(I ::: "memory"); \ __asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
}
#else
/*
* IA32 has strong memory ordering guarantees, so memory
* fences are enabled if and only if the user specifies that
* that the program will be using non-temporal instructions.
* Otherwise, an optimization barrier is used in order to prevent
* compiler re-ordering of loads and stores across the barrier.
*/
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__("" ::: "memory"); \
} }
#endif /* !CK_MD_RMO && !CK_MD_PSO */
CK_PR_FENCE(load, "lfence") CK_PR_FENCE(load, "lfence")
CK_PR_FENCE(load_depends, "") CK_PR_FENCE(load_load, "lfence")
CK_PR_FENCE(load_store, "mfence")
CK_PR_FENCE(store, "sfence") CK_PR_FENCE(store, "sfence")
CK_PR_FENCE(store_store, "sfence")
CK_PR_FENCE(store_load, "mfence")
CK_PR_FENCE(memory, "mfence") CK_PR_FENCE(memory, "mfence")
#undef CK_PR_FENCE #undef CK_PR_FENCE

@ -62,40 +62,19 @@ ck_pr_stall(void)
return; return;
} }
#if defined(CK_MD_RMO) || defined(CK_MD_PSO)
#define CK_PR_FENCE(T, I) \ #define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \ CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \ ck_pr_fence_strict_##T(void) \
{ \ { \
__asm__ __volatile__(I ::: "memory"); \ __asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
}
#else
/*
* IA32 has strong memory ordering guarantees, so memory
* fences are enabled if and only if the user specifies that
* that the program will be using non-temporal instructions.
* Otherwise, an optimization barrier is used in order to prevent
* compiler re-ordering of loads and stores across the barrier.
*/
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
} \
CK_CC_INLINE static void ck_pr_fence_##T(void) \
{ \
__asm__ __volatile__("" ::: "memory"); \
} }
#endif /* !CK_MD_RMO && !CK_MD_PSO */
CK_PR_FENCE(load, "lfence") CK_PR_FENCE(load, "lfence")
CK_PR_FENCE(load_depends, "") CK_PR_FENCE(load_load, "lfence")
CK_PR_FENCE(load_store, "mfence")
CK_PR_FENCE(store, "sfence") CK_PR_FENCE(store, "sfence")
CK_PR_FENCE(store_store, "sfence")
CK_PR_FENCE(store_load, "mfence")
CK_PR_FENCE(memory, "mfence") CK_PR_FENCE(memory, "mfence")
#undef CK_PR_FENCE #undef CK_PR_FENCE

Loading…
Cancel
Save