From 44b769963fc0ce724e770e5fb3f52f38307c684b Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Sat, 11 May 2013 14:23:27 -0400 Subject: [PATCH] ck_pr: ck_pr_fence_X_Y interface has been added. ck_pr_fence_{load_load,store_store,load_store,store_load} operations have been added. In addition to this, it is no longer the responsibility of architecture ports to determine when to emit a specific fence. Instead, the underlying port will always emit the necessary instructions to enforce strict ordering. The higher-level include/ck_pr implementation will enforce whether or not a fence is necessary to be emitted according to the memory model specified by ck_md (CK_MD_{TSO,RMO,PSO}). In other words, only ck_pr_fence_strict_* is implemented by the MD-ck_pr port. --- include/ck_pr.h | 61 +++++++++++++++++++++++++++++++++++++ include/gcc/ppc/ck_pr.h | 33 ++++++-------------- include/gcc/ppc64/ck_pr.h | 33 ++++++-------------- include/gcc/sparcv9/ck_pr.h | 39 ++++++------------------ include/gcc/x86/ck_pr.h | 29 +++--------------- include/gcc/x86_64/ck_pr.h | 29 +++--------------- 6 files changed, 99 insertions(+), 125 deletions(-) diff --git a/include/ck_pr.h b/include/ck_pr.h index 35540b3..10839a3 100644 --- a/include/ck_pr.h +++ b/include/ck_pr.h @@ -30,6 +30,7 @@ #include #include +#include #include #include @@ -49,6 +50,66 @@ #error Your platform is unsupported #endif +#define CK_PR_FENCE_EMIT(T) \ + CK_CC_INLINE static void \ + ck_pr_fence_##T(void) \ + { \ + ck_pr_fence_strict_##T(); \ + } +#define CK_PR_FENCE_NOOP(T) \ + CK_CC_INLINE static void \ + ck_pr_fence_##T(void) \ + { \ + return; \ + } + +/* + * None of the currently supported platforms allow for data-dependent + * load ordering. + */ +CK_PR_FENCE_NOOP(load_depends) + +#if defined(CK_MD_RMO) +/* + * Only stores to the same location have a global + * ordering. + */ +CK_PR_FENCE_EMIT(load_load) +CK_PR_FENCE_EMIT(load_store) +CK_PR_FENCE_EMIT(store_store) +CK_PR_FENCE_EMIT(store_load) +CK_PR_FENCE_EMIT(load) +CK_PR_FENCE_EMIT(store) +CK_PR_FENCE_EMIT(memory) +#elif defined(CK_MD_PSO) +/* + * Anything can be re-ordered with respect to stores. + * Otherwise, loads are executed in-order. + */ +CK_PR_FENCE_NOOP(load_load) +CK_PR_FENCE_EMIT(load_store) +CK_PR_FENCE_EMIT(store_store) +CK_PR_FENCE_EMIT(store_load) +CK_PR_FENCE_NOOP(load) +CK_PR_FENCE_EMIT(store) +CK_PR_FENCE_EMIT(memory) +#elif defined(CK_MD_TSO) +/* + * Only loads are re-ordered and only with respect to + * prior stores. + */ +CK_PR_FENCE_NOOP(load_load) +CK_PR_FENCE_NOOP(load_store) +CK_PR_FENCE_NOOP(store_store) +CK_PR_FENCE_EMIT(store_load) +CK_PR_FENCE_NOOP(load) +CK_PR_FENCE_NOOP(store) +CK_PR_FENCE_NOOP(memory) +#endif /* CK_MD_TSO */ + +#undef CK_PR_FENCE_EMIT +#undef CK_PR_FENCE_NOOP + #define CK_PR_BIN(K, S, M, T, P, C) \ CK_CC_INLINE static void \ ck_pr_##K##_##S(M *target, T value) \ diff --git a/include/gcc/ppc/ck_pr.h b/include/gcc/ppc/ck_pr.h index e1f88a6..8b14772 100644 --- a/include/gcc/ppc/ck_pr.h +++ b/include/gcc/ppc/ck_pr.h @@ -55,33 +55,20 @@ ck_pr_stall(void) return; } -#if defined(CK_MD_RMO) || defined(CK_MD_PSO) -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } -#else -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__("" ::: "memory"); \ - } -#endif /* !CK_MD_RMO && !CK_MD_PSO */ +#define CK_PR_FENCE(T, I) \ + CK_CC_INLINE static void \ + ck_pr_fence_strict_##T(void) \ + { \ + __asm__ __volatile__(I ::: "memory"); \ + } CK_PR_FENCE(load_depends, "") CK_PR_FENCE(store, "lwsync") +CK_PR_FENCE(store_store, "lwsync") +CK_PR_FENCE(store_load, "sync") CK_PR_FENCE(load, "lwsync") +CK_PR_FENCE(load_load, "lwsync") +CK_PR_FENCE(load_store, "lwsync") CK_PR_FENCE(memory, "sync") #undef CK_PR_FENCE diff --git a/include/gcc/ppc64/ck_pr.h b/include/gcc/ppc64/ck_pr.h index 62aeb7a..aebd2c9 100644 --- a/include/gcc/ppc64/ck_pr.h +++ b/include/gcc/ppc64/ck_pr.h @@ -54,29 +54,12 @@ ck_pr_stall(void) return; } -#if defined(CK_MD_RMO) || defined(CK_MD_PSO) -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } -#else -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__("" ::: "memory"); \ - } -#endif /* !CK_MD_RMO && !CK_MD_PSO */ +#define CK_PR_FENCE(T, I) \ + CK_CC_INLINE static void \ + ck_pr_fence_strict_##T(void) \ + { \ + __asm__ __volatile__(I ::: "memory"); \ + } /* * These are derived from: @@ -84,7 +67,11 @@ ck_pr_stall(void) */ CK_PR_FENCE(load_depends, "") CK_PR_FENCE(store, "lwsync") +CK_PR_FENCE(store_store, "lwsync") +CK_PR_FENCE(store_load, "sync") CK_PR_FENCE(load, "lwsync") +CK_PR_FENCE(load_load, "lwsync") +CK_PR_FENCE(load_store, "lwsync") CK_PR_FENCE(memory, "sync") #undef CK_PR_FENCE diff --git a/include/gcc/sparcv9/ck_pr.h b/include/gcc/sparcv9/ck_pr.h index ba2fc41..fe6991a 100644 --- a/include/gcc/sparcv9/ck_pr.h +++ b/include/gcc/sparcv9/ck_pr.h @@ -51,39 +51,20 @@ ck_pr_stall(void) return; } -#if defined(CK_MD_RMO) || defined(CK_MD_PSO) -/* - * If RMO is forced, then do not assume TSO model. - */ -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } -#else -/* - * By default, we will assume TSO model is used on SPARCv9. - */ -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__("" ::: "memory"); \ - } -#endif /* !CK_MD_RMO && !CK_MD_PSO */ +#define CK_PR_FENCE(T, I) \ + CK_CC_INLINE static void \ + ck_pr_fence_strict_##T(void) \ + { \ + __asm__ __volatile__(I ::: "memory"); \ + } CK_PR_FENCE(load_depends, "") CK_PR_FENCE(store, "membar #StoreStore") +CK_PR_FENCE(store_store, "membar #StoreStore") +CK_PR_FENCE(store_load, "membar #StoreLoad") CK_PR_FENCE(load, "membar #LoadLoad") +CK_PR_FENCE(load_load, "membar #LoadLoad") +CK_PR_FENCE(load_store, "membar #LoadStore") CK_PR_FENCE(memory, "membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad") #undef CK_PR_FENCE diff --git a/include/gcc/x86/ck_pr.h b/include/gcc/x86/ck_pr.h index 38a0485..e0b04c9 100644 --- a/include/gcc/x86/ck_pr.h +++ b/include/gcc/x86/ck_pr.h @@ -63,40 +63,19 @@ ck_pr_stall(void) return; } -#if defined(CK_MD_RMO) || defined(CK_MD_PSO) #define CK_PR_FENCE(T, I) \ CK_CC_INLINE static void \ ck_pr_fence_strict_##T(void) \ { \ __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } -#else -/* - * IA32 has strong memory ordering guarantees, so memory - * fences are enabled if and only if the user specifies that - * that the program will be using non-temporal instructions. - * Otherwise, an optimization barrier is used in order to prevent - * compiler re-ordering of loads and stores across the barrier. - */ -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__("" ::: "memory"); \ } -#endif /* !CK_MD_RMO && !CK_MD_PSO */ CK_PR_FENCE(load, "lfence") -CK_PR_FENCE(load_depends, "") +CK_PR_FENCE(load_load, "lfence") +CK_PR_FENCE(load_store, "mfence") CK_PR_FENCE(store, "sfence") +CK_PR_FENCE(store_store, "sfence") +CK_PR_FENCE(store_load, "mfence") CK_PR_FENCE(memory, "mfence") #undef CK_PR_FENCE diff --git a/include/gcc/x86_64/ck_pr.h b/include/gcc/x86_64/ck_pr.h index 84e893b..004f5e5 100644 --- a/include/gcc/x86_64/ck_pr.h +++ b/include/gcc/x86_64/ck_pr.h @@ -62,40 +62,19 @@ ck_pr_stall(void) return; } -#if defined(CK_MD_RMO) || defined(CK_MD_PSO) #define CK_PR_FENCE(T, I) \ CK_CC_INLINE static void \ ck_pr_fence_strict_##T(void) \ { \ __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } -#else -/* - * IA32 has strong memory ordering guarantees, so memory - * fences are enabled if and only if the user specifies that - * that the program will be using non-temporal instructions. - * Otherwise, an optimization barrier is used in order to prevent - * compiler re-ordering of loads and stores across the barrier. - */ -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__("" ::: "memory"); \ } -#endif /* !CK_MD_RMO && !CK_MD_PSO */ CK_PR_FENCE(load, "lfence") -CK_PR_FENCE(load_depends, "") +CK_PR_FENCE(load_load, "lfence") +CK_PR_FENCE(load_store, "mfence") CK_PR_FENCE(store, "sfence") +CK_PR_FENCE(store_store, "sfence") +CK_PR_FENCE(store_load, "mfence") CK_PR_FENCE(memory, "mfence") #undef CK_PR_FENCE