diff --git a/include/gcc/x86/ck_pr.h b/include/gcc/x86/ck_pr.h index 8cac729..1bbbd82 100644 --- a/include/gcc/x86/ck_pr.h +++ b/include/gcc/x86/ck_pr.h @@ -164,16 +164,26 @@ CK_PR_LOAD_S(8, uint8_t, "movb") CK_CC_INLINE static void ck_pr_load_32_2(uint32_t target[2], uint32_t v[2]) { +#ifdef __PIC__ __asm__ __volatile__("pushl %%ebx;" "movl %%edx, %%ecx;" "movl %%eax, %%ebx;" - CK_PR_LOCK_PREFIX "cmpxchg8b %0;" + CK_PR_LOCK_PREFIX "cmpxchg8b %a2;" "popl %%ebx;" + : "=a" (v[0]), + "=d" (v[1]) + : "p" (&target[0]) + : "%ecx", "memory", "cc"); +#else + __asm__ __volatile__("movl %%edx, %%ecx;" + "movl %%eax, %%ebx;" + CK_PR_LOCK_PREFIX "cmpxchg8b %0;" : "+m" (*(uint32_t *)target), "=a" (v[0]), "=d" (v[1]) : - : "ecx", "memory", "cc"); + : "%ebx", "%ecx", "memory", "cc"); +#endif return; } @@ -424,42 +434,11 @@ CK_PR_CAS_O_S(8, uint8_t, "b", "al") #undef CK_PR_CAS_O_S #undef CK_PR_CAS_O -/* - * Contrary to C-interface, alignment requirements are that of uint32_t[2]. - */ -CK_CC_INLINE static bool -ck_pr_cas_32_2(uint32_t target[2], uint32_t compare[2], uint32_t set[2]) -{ - bool z; - - __asm__ __volatile__("pushl %%ebx;" - "pushl %%ecx;" - "movl 0(%2), %%ebx;" - "movl 4(%2), %%ecx;" - "movl %3, %%eax;" - "leal %3, %%edx;" - "movl 4(%%edx), %%edx;" - CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %1;" - "popl %%ecx;" - "popl %%ebx;" - : "+m" (*target), - "=m" (z) - : "q" (set), - "m" (compare) - : "memory", "cc", "%eax", "%edx"); - return (bool)z; -} - -CK_CC_INLINE static bool -ck_pr_cas_ptr_2(void *t, void *c, void *s) -{ - return ck_pr_cas_32_2(t, c, s); -} - CK_CC_INLINE static bool ck_pr_cas_64(uint64_t *t, uint64_t c, uint64_t s) { bool z; + union { uint64_t s; uint32_t v[2]; @@ -470,22 +449,31 @@ ck_pr_cas_64(uint64_t *t, uint64_t c, uint64_t s) uint32_t v[2]; } comp; - set.s = s; - comp.c = c; + ck_pr_store_64(&set.s, s); + ck_pr_store_64(&comp.c, c); +#ifdef __PIC__ __asm__ __volatile__("pushl %%ebx;" - "pushl %%ecx;" - "movl 0(%4), %%ebx;" - "movl 4(%4), %%ecx;" + "movl %5, %%ebx;" CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %1;" - "popl %%ecx;" "popl %%ebx;" + : "+m" (*t), + "=adc" (z) + : "a" (comp.v[0]), + "d" (comp.v[1]), + "c" (set.v[1]), + "m" (set.v[0]) + : "memory", "cc"); +#else + __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %1;" : "+m" (*t), "=q" (z) : "a" (comp.v[0]), "d" (comp.v[1]), - "q" (set.v) + "b" (set.v[0]), + "c" (set.v[1]) : "memory", "cc"); +#endif return (bool)z; } @@ -506,6 +494,7 @@ ck_pr_cas_64_value(uint64_t *t, uint64_t c, uint64_t s, uint64_t *v) set.s = s; comp.c = c; +#ifdef __PIC__ /* * Note the setz being done in memory. This is because if we allow * gcc to pick a register, it seems to want to pick BL, which is @@ -514,43 +503,103 @@ ck_pr_cas_64_value(uint64_t *t, uint64_t c, uint64_t s, uint64_t *v) * this. This also affects ck_pr_cas_32_2_value. */ __asm__ __volatile__("pushl %%ebx;" - "pushl %%ecx;" - "movl 0(%6), %%ebx;" - "movl 4(%6), %%ecx;" - CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %3;" - "popl %%ecx;" + "movl %7, %%ebx;" + CK_PR_LOCK_PREFIX "cmpxchg8b %a3; setz %2;" "popl %%ebx;" + : "=a" (val[0]), + "=d" (val[1]), + "=q" (z) + : "p" (t), + "a" (comp.v[0]), + "d" (comp.v[1]), + "c" (set.v[1]), + "m" (set.v[0]) + : "memory", "cc"); +#else + __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %3;" : "+m" (*t), "=a" (val[0]), "=d" (val[1]), - "=m" (z) + "=q" (z) : "a" (comp.v[0]), "d" (comp.v[1]), - "q" (set.v) + "b" (set.v[0]), + "c" (set.v[1]) + : "memory", "cc"); + +#endif + return (bool)z; +} + +CK_CC_INLINE static bool +ck_pr_cas_32_2(uint32_t t[2], uint32_t c[2], uint32_t s[2]) +{ + bool z; + +#ifdef __PIC__ + __asm__ __volatile__("pushl %%ebx;" + "movl %5, %%ebx;" + CK_PR_LOCK_PREFIX "cmpxchg8b %a1; setz %0;" + "popl %%ebx;" + : "=q" (z) + : "p" (&t[0]), + "a" (c[0]), + "d" (c[1]), + "c" (s[1]), + "m" (s[0]) + : "memory", "cc"); +#else + __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %1;" + : "+m" (*t), + "=q" (z) + : "a" (c[0]), + "d" (c[1]), + "b" (s[0]), + "c" (s[1]) : "memory", "cc"); +#endif + return (bool)z; } +CK_CC_INLINE static bool +ck_pr_cas_ptr_2(void *t, void *c, void *s) +{ + return ck_pr_cas_32_2(t, c, s); +} + + CK_CC_INLINE static bool ck_pr_cas_32_2_value(uint32_t target[2], uint32_t compare[2], uint32_t set[2], uint32_t v[2]) { bool z; +#ifdef __PIC__ __asm__ __volatile__("pushl %%ebx;" - "pushl %%ecx;" - "movl 0(%4), %%ebx;" - "movl 4(%4), %%ecx;" - CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %3;" - "popl %%ecx;" + "movl %7, %%ebx;" + CK_PR_LOCK_PREFIX "cmpxchg8b %a4; setz %2;" "popl %%ebx;" + : "=a" (v[0]), + "=d" (v[1]), + "=q" (z) + : "p" (target), + "a" (compare[0]), + "d" (compare[1]), + "c" (set[1]), + "m" (set[0]) + : "memory", "cc"); +#else + __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %3;" : "+m" (*target), "=a" (v[0]), "=d" (v[1]), - "=m" (z) + "=q" (z) : "a" (compare[0]), "d" (compare[1]), - "q" (set) + "b" (set[0]), + "c" (set[1]) : "memory", "cc"); +#endif return (bool)z; } @@ -610,6 +659,8 @@ CK_PR_CAS_V(8, 8, uint8_t) CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0") \ CK_PR_BT_S(K, 16, uint16_t, #K "w %w2, %0") +/* TODO: GCC's intrinsic atomics for btc and bts don't work for 64-bit. */ + CK_PR_GENERATE(btc) CK_PR_GENERATE(bts) CK_PR_GENERATE(btr)