diff --git a/doc/ck_array_commit b/doc/ck_array_commit index cc56e29..0fc1192 100644 --- a/doc/ck_array_commit +++ b/doc/ck_array_commit @@ -39,7 +39,7 @@ Concurrency Kit (libck, \-lck) The .Fn ck_array_commit 3 function will commit any pending put or remove operations associated -with the array. The function may end up requested the safe reclamation +with the array. The function may end up requesting the safe reclamation of memory actively being iterated upon by other threads. .Sh RETURN VALUES This function returns true if the commit operation succeeded. It will diff --git a/include/ck_ring.h b/include/ck_ring.h index 3fcd123..88bb837 100644 --- a/include/ck_ring.h +++ b/include/ck_ring.h @@ -308,13 +308,6 @@ _ck_ring_dequeue_spmc(struct ck_ring *ring, ck_pr_fence_load(); - /* - * Both LLVM and GCC have generated code which completely - * ignores the semantics of the r load, despite it being - * sandwiched between compiler barriers. We use an atomic - * volatile load to force volatile semantics while allowing - * for r itself to remain aliased across the loop. - */ target = (char *)buffer + size * (consumer & mask); memcpy(data, target, size); @@ -409,7 +402,7 @@ ck_ring_trydequeue_spmc_##name(struct ck_ring *a, \ struct type *c) \ { \ \ - return _ck_ring_trydequeue_spmc(ring, \ + return _ck_ring_trydequeue_spmc(a, \ b, c, sizeof(struct type)); \ } \ \ diff --git a/include/gcc/ck_cc.h b/include/gcc/ck_cc.h index c2463c6..7a98c94 100644 --- a/include/gcc/ck_cc.h +++ b/include/gcc/ck_cc.h @@ -33,10 +33,18 @@ #define CK_CC_UNUSED #define CK_CC_USED #define CK_CC_IMM +#define CK_CC_IMM_U32 #else #define CK_CC_UNUSED __attribute__((unused)) #define CK_CC_USED __attribute__((used)) #define CK_CC_IMM "i" +#if defined(__x86_64__) || defined(__x86__) +#define CK_CC_IMM_U32 "Z" +#define CK_CC_IMM_S32 "e" +#else +#define CK_CC_IMM_U32 CK_CC_IMM +#define CK_CC_IMM_S32 CK_CC_IMM +#endif /* __x86_64__ || __x86__ */ #endif /* diff --git a/include/gcc/x86_64/ck_pr.h b/include/gcc/x86_64/ck_pr.h index 0e0020e..4d57e80 100644 --- a/include/gcc/x86_64/ck_pr.h +++ b/include/gcc/x86_64/ck_pr.h @@ -194,15 +194,15 @@ CK_PR_LOAD_2(8, 16, uint8_t) /* * Atomic store-to-memory operations. */ -#define CK_PR_STORE_IMM(S, M, T, C, I) \ - CK_CC_INLINE static void \ - ck_pr_store_##S(M *target, T v) \ - { \ - __asm__ __volatile__(I " %1, %0" \ - : "=m" (*(C *)target) \ - : CK_CC_IMM "q" (v) \ - : "memory"); \ - return; \ +#define CK_PR_STORE_IMM(S, M, T, C, I, K) \ + CK_CC_INLINE static void \ + ck_pr_store_##S(M *target, T v) \ + { \ + __asm__ __volatile__(I " %1, %0" \ + : "=m" (*(C *)target) \ + : K "q" (v) \ + : "memory"); \ + return; \ } #define CK_PR_STORE(S, M, T, C, I) \ @@ -216,18 +216,18 @@ CK_PR_LOAD_2(8, 16, uint8_t) return; \ } -CK_PR_STORE_IMM(ptr, void, const void *, char, "movq") +CK_PR_STORE_IMM(ptr, void, const void *, char, "movq", CK_CC_IMM_U32) CK_PR_STORE(double, double, double, double, "movq") -#define CK_PR_STORE_S(S, T, I) CK_PR_STORE_IMM(S, T, T, T, I) +#define CK_PR_STORE_S(S, T, I, K) CK_PR_STORE_IMM(S, T, T, T, I, K) -CK_PR_STORE_S(char, char, "movb") -CK_PR_STORE_S(uint, unsigned int, "movl") -CK_PR_STORE_S(int, int, "movl") -CK_PR_STORE_S(64, uint64_t, "movq") -CK_PR_STORE_S(32, uint32_t, "movl") -CK_PR_STORE_S(16, uint16_t, "movw") -CK_PR_STORE_S(8, uint8_t, "movb") +CK_PR_STORE_S(char, char, "movb", CK_CC_IMM_S32) +CK_PR_STORE_S(int, int, "movl", CK_CC_IMM_S32) +CK_PR_STORE_S(uint, unsigned int, "movl", CK_CC_IMM_U32) +CK_PR_STORE_S(64, uint64_t, "movq", CK_CC_IMM_U32) +CK_PR_STORE_S(32, uint32_t, "movl", CK_CC_IMM_U32) +CK_PR_STORE_S(16, uint16_t, "movw", CK_CC_IMM_U32) +CK_PR_STORE_S(8, uint8_t, "movb", CK_CC_IMM_U32) #undef CK_PR_STORE_S #undef CK_PR_STORE_IMM @@ -324,28 +324,28 @@ CK_PR_GENERATE(not) /* * Atomic store-only binary operations. */ -#define CK_PR_BINARY(K, S, M, T, C, I) \ +#define CK_PR_BINARY(K, S, M, T, C, I, O) \ CK_CC_INLINE static void \ ck_pr_##K##_##S(M *target, T d) \ { \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0" \ : "+m" (*(C *)target) \ - : CK_CC_IMM "q" (d) \ + : O "q" (d) \ : "memory", "cc"); \ return; \ } -#define CK_PR_BINARY_S(K, S, T, I) CK_PR_BINARY(K, S, T, T, T, I) - -#define CK_PR_GENERATE(K) \ - CK_PR_BINARY(K, ptr, void, uintptr_t, char, #K "q") \ - CK_PR_BINARY_S(K, char, char, #K "b") \ - CK_PR_BINARY_S(K, int, int, #K "l") \ - CK_PR_BINARY_S(K, uint, unsigned int, #K "l") \ - CK_PR_BINARY_S(K, 64, uint64_t, #K "q") \ - CK_PR_BINARY_S(K, 32, uint32_t, #K "l") \ - CK_PR_BINARY_S(K, 16, uint16_t, #K "w") \ - CK_PR_BINARY_S(K, 8, uint8_t, #K "b") +#define CK_PR_BINARY_S(K, S, T, I, O) CK_PR_BINARY(K, S, T, T, T, I, O) + +#define CK_PR_GENERATE(K) \ + CK_PR_BINARY(K, ptr, void, uintptr_t, char, #K "q", CK_CC_IMM_U32) \ + CK_PR_BINARY_S(K, char, char, #K "b", CK_CC_IMM_S32) \ + CK_PR_BINARY_S(K, int, int, #K "l", CK_CC_IMM_S32) \ + CK_PR_BINARY_S(K, uint, unsigned int, #K "l", CK_CC_IMM_U32) \ + CK_PR_BINARY_S(K, 64, uint64_t, #K "q", CK_CC_IMM_U32) \ + CK_PR_BINARY_S(K, 32, uint32_t, #K "l", CK_CC_IMM_U32) \ + CK_PR_BINARY_S(K, 16, uint16_t, #K "w", CK_CC_IMM_U32) \ + CK_PR_BINARY_S(K, 8, uint8_t, #K "b", CK_CC_IMM_U32) CK_PR_GENERATE(add) CK_PR_GENERATE(sub) diff --git a/regressions/ck_pr/validate/ck_pr_or.c b/regressions/ck_pr/validate/ck_pr_or.c index 8c797a2..27580c3 100644 --- a/regressions/ck_pr/validate/ck_pr_or.c +++ b/regressions/ck_pr/validate/ck_pr_or.c @@ -42,7 +42,9 @@ #define CK_PR_OR_T(w, v, d) \ { \ - uint##w##_t t = v; \ + uint##w##_t t; \ + ck_pr_or_##w(&t, 1ULL << (w - 1)); \ + t = v; \ ck_pr_or_##w(&t, d); \ if (t != (uint##w##_t)(v | d)) { \ printf("FAIL ["); \ diff --git a/regressions/ck_pr/validate/ck_pr_store.c b/regressions/ck_pr/validate/ck_pr_store.c index 23b6d47..e4b852b 100644 --- a/regressions/ck_pr/validate/ck_pr_store.c +++ b/regressions/ck_pr/validate/ck_pr_store.c @@ -40,7 +40,8 @@ #define CK_PR_STORE_B(w) \ { \ - uint##w##_t t = (uint##w##_t)-1, a = 0; \ + uint##w##_t t = (uint##w##_t)-1, a = 0, b; \ + ck_pr_store_##w(&b, 1ULL << (w - 1)); \ unsigned int i; \ printf("ck_pr_store_" #w ": "); \ if (w < 10) \