diff --git a/include/ck_pr.h b/include/ck_pr.h index 2059dd6..3876e1f 100644 --- a/include/ck_pr.h +++ b/include/ck_pr.h @@ -35,6 +35,8 @@ #if defined(__x86_64__) #include "gcc/x86_64/ck_pr.h" +#elif defined(__x86__) +#include "gcc/x86/ck_pr.h" #elif defined(__sparcv9__) #include "gcc/sparcv9/ck_pr.h" #elif defined(__GNUC__) diff --git a/include/gcc/x86/ck_f_pr.h b/include/gcc/x86/ck_f_pr.h index 8894c74..0f0ce4b 100644 --- a/include/gcc/x86/ck_f_pr.h +++ b/include/gcc/x86/ck_f_pr.h @@ -37,8 +37,6 @@ #define CK_F_PR_CAS_32_2_VALUE #define CK_F_PR_CAS_32_VALUE #define CK_F_PR_CAS_64 -#define CK_F_PR_CAS_64_1 -#define CK_F_PR_CAS_64_1_VALUE #define CK_F_PR_CAS_64_VALUE #define CK_F_PR_CAS_8 #define CK_F_PR_CAS_8_8 @@ -88,14 +86,6 @@ #define CK_F_PR_FAS_INT #define CK_F_PR_FAS_PTR #define CK_F_PR_FAS_UINT -#define CK_F_PR_FENCE_LOAD -#define CK_F_PR_FENCE_LOAD_DEPENDS -#define CK_F_PR_FENCE_MEMORY -#define CK_F_PR_FENCE_STORE -#define CK_F_PR_FENCE_STRICT_LOAD -#define CK_F_PR_FENCE_STRICT_LOAD_DEPENDS -#define CK_F_PR_FENCE_STRICT_MEMORY -#define CK_F_PR_FENCE_STRICT_STORE #define CK_F_PR_INC_16 #define CK_F_PR_INC_16_ZERO #define CK_F_PR_INC_32 @@ -152,9 +142,9 @@ #define CK_F_PR_OR_INT #define CK_F_PR_OR_PTR #define CK_F_PR_OR_UINT -#define CK_F_PR_STALL #define CK_F_PR_STORE_16 #define CK_F_PR_STORE_32 +#define CK_F_PR_STORE_64 #define CK_F_PR_STORE_8 #define CK_F_PR_STORE_CHAR #define CK_F_PR_STORE_INT diff --git a/include/gcc/x86/ck_pr.h b/include/gcc/x86/ck_pr.h index eb8df18..2bb0dd7 100644 --- a/include/gcc/x86/ck_pr.h +++ b/include/gcc/x86/ck_pr.h @@ -52,42 +52,6 @@ #define CK_PR_LOCK_PREFIX "lock " #endif -/* - * Prevent speculative execution in busy-wait loops (P4 <=) - * or "predefined delay". - */ -CK_CC_INLINE static void -ck_pr_stall(void) -{ - __asm__ __volatile__("pause" ::: "memory"); - return; -} - -/* - * IA32 has strong memory ordering guarantees, so memory - * fences are enabled if and only if the user specifies that - * that the program will be using non-temporal instructions. - * Otherwise, an optimization barrier is used in order to prevent - * compiler re-ordering of loads and stores across the barrier. - */ -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__("" ::: "memory"); \ - } - -CK_PR_FENCE(load, "lfence") -CK_PR_FENCE(load_depends, "") -CK_PR_FENCE(store, "sfence") -CK_PR_FENCE(memory, "mfence") - -#undef CK_PR_FENCE - /* * Atomic fetch-and-store operations. */ @@ -553,12 +517,9 @@ ck_pr_cas_##S##_##W##_value(T *t, T c[W], T s[W], T *v) \ CK_PR_CAS_V(char, 8, char) CK_PR_CAS_V(int, 2, int) CK_PR_CAS_V(uint, 2, unsigned int) -CK_PR_CAS_V(64, 1, uint64_t) CK_PR_CAS_V(16, 4, uint16_t) CK_PR_CAS_V(8, 8, uint8_t) -#define ck_pr_cas_64_value(A, B, C, D) ck_pr_cas_64_1_value((A), &(B), &(C), (D)) - #undef CK_PR_CAS_V /* diff --git a/regressions/common.h b/regressions/common.h index d316580..669fe30 100644 --- a/regressions/common.h +++ b/regressions/common.h @@ -78,7 +78,7 @@ aff_iterate(struct affinity *acb CK_CC_UNUSED) CK_CC_INLINE static uint64_t rdtsc(void) { -#if defined(__x86_64__) +#if defined(__x86__) || defined(__x86_64__) uint32_t eax = 0, edx; __asm__ __volatile__("cpuid;"