x86: Make things friendlier for PIC and non-PIC builds

Making things work properly with PIC on 32-bit x86 architectures is tricky
because of our lack of %ebx. Additionally, GCC versions < 4.3 have some
problems determining what registers may be reused, causing some of the inline
assembly constraints to be a little counterintuitive. (Thanks to Ian Lance
Taylor for the suggestion to get around the reuse issues.)

This change makes us use sane assembler in cases where we're running
non-PIC and use the heavyweight versions only for PIC. There may still be
some issues in this code; for example, it's apparent that 64-bit btc and
bts intrinsic atomics are broken in the version of GCC I'm using, so those
will have to be implemented.

Additionally, the ck_stack tests currently don't work with fPIC (not sure
if that's the fault of the tests or the port). Everything does pass now in
non-PIC, excluding btc/bts tests (in my current environment).
ck_pring
Devon H. O'Dell 14 years ago
parent 8818191ec0
commit 000eb80099

@ -164,16 +164,26 @@ CK_PR_LOAD_S(8, uint8_t, "movb")
CK_CC_INLINE static void
ck_pr_load_32_2(uint32_t target[2], uint32_t v[2])
{
#ifdef __PIC__
__asm__ __volatile__("pushl %%ebx;"
"movl %%edx, %%ecx;"
"movl %%eax, %%ebx;"
CK_PR_LOCK_PREFIX "cmpxchg8b %0;"
CK_PR_LOCK_PREFIX "cmpxchg8b %a2;"
"popl %%ebx;"
: "=a" (v[0]),
"=d" (v[1])
: "p" (&target[0])
: "%ecx", "memory", "cc");
#else
__asm__ __volatile__("movl %%edx, %%ecx;"
"movl %%eax, %%ebx;"
CK_PR_LOCK_PREFIX "cmpxchg8b %0;"
: "+m" (*(uint32_t *)target),
"=a" (v[0]),
"=d" (v[1])
:
: "ecx", "memory", "cc");
: "%ebx", "%ecx", "memory", "cc");
#endif
return;
}
@ -424,42 +434,11 @@ CK_PR_CAS_O_S(8, uint8_t, "b", "al")
#undef CK_PR_CAS_O_S
#undef CK_PR_CAS_O
/*
* Contrary to C-interface, alignment requirements are that of uint32_t[2].
*/
CK_CC_INLINE static bool
ck_pr_cas_32_2(uint32_t target[2], uint32_t compare[2], uint32_t set[2])
{
bool z;
__asm__ __volatile__("pushl %%ebx;"
"pushl %%ecx;"
"movl 0(%2), %%ebx;"
"movl 4(%2), %%ecx;"
"movl %3, %%eax;"
"leal %3, %%edx;"
"movl 4(%%edx), %%edx;"
CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %1;"
"popl %%ecx;"
"popl %%ebx;"
: "+m" (*target),
"=m" (z)
: "q" (set),
"m" (compare)
: "memory", "cc", "%eax", "%edx");
return (bool)z;
}
CK_CC_INLINE static bool
ck_pr_cas_ptr_2(void *t, void *c, void *s)
{
return ck_pr_cas_32_2(t, c, s);
}
CK_CC_INLINE static bool
ck_pr_cas_64(uint64_t *t, uint64_t c, uint64_t s)
{
bool z;
union {
uint64_t s;
uint32_t v[2];
@ -470,22 +449,31 @@ ck_pr_cas_64(uint64_t *t, uint64_t c, uint64_t s)
uint32_t v[2];
} comp;
set.s = s;
comp.c = c;
ck_pr_store_64(&set.s, s);
ck_pr_store_64(&comp.c, c);
#ifdef __PIC__
__asm__ __volatile__("pushl %%ebx;"
"pushl %%ecx;"
"movl 0(%4), %%ebx;"
"movl 4(%4), %%ecx;"
"movl %5, %%ebx;"
CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %1;"
"popl %%ecx;"
"popl %%ebx;"
: "+m" (*t),
"=adc" (z)
: "a" (comp.v[0]),
"d" (comp.v[1]),
"c" (set.v[1]),
"m" (set.v[0])
: "memory", "cc");
#else
__asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %1;"
: "+m" (*t),
"=q" (z)
: "a" (comp.v[0]),
"d" (comp.v[1]),
"q" (set.v)
"b" (set.v[0]),
"c" (set.v[1])
: "memory", "cc");
#endif
return (bool)z;
}
@ -506,6 +494,7 @@ ck_pr_cas_64_value(uint64_t *t, uint64_t c, uint64_t s, uint64_t *v)
set.s = s;
comp.c = c;
#ifdef __PIC__
/*
* Note the setz being done in memory. This is because if we allow
* gcc to pick a register, it seems to want to pick BL, which is
@ -514,43 +503,103 @@ ck_pr_cas_64_value(uint64_t *t, uint64_t c, uint64_t s, uint64_t *v)
* this. This also affects ck_pr_cas_32_2_value.
*/
__asm__ __volatile__("pushl %%ebx;"
"pushl %%ecx;"
"movl 0(%6), %%ebx;"
"movl 4(%6), %%ecx;"
CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %3;"
"popl %%ecx;"
"movl %7, %%ebx;"
CK_PR_LOCK_PREFIX "cmpxchg8b %a3; setz %2;"
"popl %%ebx;"
: "=a" (val[0]),
"=d" (val[1]),
"=q" (z)
: "p" (t),
"a" (comp.v[0]),
"d" (comp.v[1]),
"c" (set.v[1]),
"m" (set.v[0])
: "memory", "cc");
#else
__asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %3;"
: "+m" (*t),
"=a" (val[0]),
"=d" (val[1]),
"=m" (z)
"=q" (z)
: "a" (comp.v[0]),
"d" (comp.v[1]),
"q" (set.v)
"b" (set.v[0]),
"c" (set.v[1])
: "memory", "cc");
#endif
return (bool)z;
}
CK_CC_INLINE static bool
ck_pr_cas_32_2(uint32_t t[2], uint32_t c[2], uint32_t s[2])
{
bool z;
#ifdef __PIC__
__asm__ __volatile__("pushl %%ebx;"
"movl %5, %%ebx;"
CK_PR_LOCK_PREFIX "cmpxchg8b %a1; setz %0;"
"popl %%ebx;"
: "=q" (z)
: "p" (&t[0]),
"a" (c[0]),
"d" (c[1]),
"c" (s[1]),
"m" (s[0])
: "memory", "cc");
#else
__asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %1;"
: "+m" (*t),
"=q" (z)
: "a" (c[0]),
"d" (c[1]),
"b" (s[0]),
"c" (s[1])
: "memory", "cc");
#endif
return (bool)z;
}
CK_CC_INLINE static bool
ck_pr_cas_ptr_2(void *t, void *c, void *s)
{
return ck_pr_cas_32_2(t, c, s);
}
CK_CC_INLINE static bool
ck_pr_cas_32_2_value(uint32_t target[2], uint32_t compare[2], uint32_t set[2], uint32_t v[2])
{
bool z;
#ifdef __PIC__
__asm__ __volatile__("pushl %%ebx;"
"pushl %%ecx;"
"movl 0(%4), %%ebx;"
"movl 4(%4), %%ecx;"
CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %3;"
"popl %%ecx;"
"movl %7, %%ebx;"
CK_PR_LOCK_PREFIX "cmpxchg8b %a4; setz %2;"
"popl %%ebx;"
: "=a" (v[0]),
"=d" (v[1]),
"=q" (z)
: "p" (target),
"a" (compare[0]),
"d" (compare[1]),
"c" (set[1]),
"m" (set[0])
: "memory", "cc");
#else
__asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg8b %0; setz %3;"
: "+m" (*target),
"=a" (v[0]),
"=d" (v[1]),
"=m" (z)
"=q" (z)
: "a" (compare[0]),
"d" (compare[1]),
"q" (set)
"b" (set[0]),
"c" (set[1])
: "memory", "cc");
#endif
return (bool)z;
}
@ -610,6 +659,8 @@ CK_PR_CAS_V(8, 8, uint8_t)
CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0") \
CK_PR_BT_S(K, 16, uint16_t, #K "w %w2, %0")
/* TODO: GCC's intrinsic atomics for btc and bts don't work for 64-bit. */
CK_PR_GENERATE(btc)
CK_PR_GENERATE(bts)
CK_PR_GENERATE(btr)

Loading…
Cancel
Save