ck_rwlock: Add basic RTM interface to rwlock.

It is possible this will be moved to a self-contained file.
For a majority of architectures, RTM is an unnecessary
implementation-specific optimization.
ck_pring
Samy Al Bahra 12 years ago
parent 39ed9c6b8a
commit 4d2ccfe497

@ -58,6 +58,21 @@ ck_rwlock_write_unlock(ck_rwlock_t *rw)
return; return;
} }
#ifdef CK_F_PR_RTM
CK_CC_INLINE static void
ck_rwlock_write_unlock_rtm(ck_rwlock_t *rw)
{
if (ck_pr_load_uint(&rw->writer) == 0) {
ck_pr_rtm_end();
return;
}
ck_rwlock_write_unlock(rw);
return;
}
#endif /* CK_F_PR_RTM */
CK_CC_INLINE static void CK_CC_INLINE static void
ck_rwlock_write_downgrade(ck_rwlock_t *rw) ck_rwlock_write_downgrade(ck_rwlock_t *rw)
{ {
@ -67,6 +82,25 @@ ck_rwlock_write_downgrade(ck_rwlock_t *rw)
return; return;
} }
#ifdef CK_F_PR_RTM
CK_CC_INLINE static void
ck_rwlock_write_downgrade_rtm(ck_rwlock_t *rw)
{
if (ck_pr_load_uint(&rw->writer) != 0) {
ck_rwlock_write_downgrade(rw);
return;
}
/*
* Both reader and writer counters are in read-set. A transactional
* abort will occur in the presence of another writer. Inner-most
* read_unlock call will attempt a transactional commit.
*/
return;
}
#endif /* CK_F_PR_RTM */
CK_CC_INLINE static bool CK_CC_INLINE static bool
ck_rwlock_write_trylock(ck_rwlock_t *rw) ck_rwlock_write_trylock(ck_rwlock_t *rw)
{ {
@ -84,6 +118,27 @@ ck_rwlock_write_trylock(ck_rwlock_t *rw)
return true; return true;
} }
#ifdef CK_F_PR_RTM
CK_CC_INLINE static bool
ck_rwlock_write_trylock_rtm(ck_rwlock_t *rw)
{
bool r;
if (ck_pr_rtm_begin() != CK_PR_RTM_STARTED) {
return ck_rwlock_write_trylock(rw);
}
r = ck_pr_load_uint(&rw->writer) != 0;
ck_pr_fence_load();
if (r | (ck_pr_load_uint(&rw->n_readers) != 0))
ck_pr_rtm_abort(0);
return true;
}
#endif /* CK_F_PR_RTM */
CK_CC_INLINE static void CK_CC_INLINE static void
ck_rwlock_write_lock(ck_rwlock_t *rw) ck_rwlock_write_lock(ck_rwlock_t *rw)
{ {
@ -99,6 +154,28 @@ ck_rwlock_write_lock(ck_rwlock_t *rw)
return; return;
} }
#ifdef CK_F_PR_RTM
CK_CC_INLINE static void
ck_rwlock_write_lock_rtm(ck_rwlock_t *rw)
{
bool r;
if (ck_pr_rtm_begin() != CK_PR_RTM_STARTED) {
ck_rwlock_write_lock(rw);
return;
}
r = ck_pr_load_uint(&rw->writer) != 0;
ck_pr_fence_load();
if (r | (ck_pr_load_uint(&rw->n_readers) != 0))
ck_pr_rtm_abort(0);
return;
}
#endif /* CK_F_PR_RTM */
CK_CC_INLINE static bool CK_CC_INLINE static bool
ck_rwlock_read_trylock(ck_rwlock_t *rw) ck_rwlock_read_trylock(ck_rwlock_t *rw)
{ {
@ -141,6 +218,7 @@ ck_rwlock_read_lock(ck_rwlock_t *rw)
if (ck_pr_load_uint(&rw->writer) == 0) if (ck_pr_load_uint(&rw->writer) == 0)
break; break;
ck_pr_dec_uint(&rw->n_readers); ck_pr_dec_uint(&rw->n_readers);
} }
@ -149,6 +227,23 @@ ck_rwlock_read_lock(ck_rwlock_t *rw)
return; return;
} }
#ifdef CK_F_PR_RTM
CK_CC_INLINE static void
ck_rwlock_read_lock_rtm(ck_rwlock_t *rw)
{
if (ck_pr_rtm_begin() == CK_PR_RTM_STARTED) {
if (ck_pr_load_uint(&rw->writer) != 0)
ck_pr_rtm_abort(0);
return;
}
ck_rwlock_read_lock(rw);
return;
}
#endif /* CK_F_PR_RTM */
CK_CC_INLINE static void CK_CC_INLINE static void
ck_rwlock_read_unlock(ck_rwlock_t *rw) ck_rwlock_read_unlock(ck_rwlock_t *rw)
{ {
@ -158,6 +253,21 @@ ck_rwlock_read_unlock(ck_rwlock_t *rw)
return; return;
} }
#ifdef CK_F_PR_RTM
CK_CC_INLINE static void
ck_rwlock_read_unlock_rtm(ck_rwlock_t *rw)
{
if (ck_pr_load_uint(&rw->n_readers) == 0) {
ck_pr_rtm_end();
} else {
ck_rwlock_read_unlock(rw);
}
return;
}
#endif /* CK_F_PR_RTM */
/* /*
* Recursive writer reader-writer lock implementation. * Recursive writer reader-writer lock implementation.
*/ */
@ -251,3 +361,4 @@ ck_rwlock_recursive_read_unlock(ck_rwlock_recursive_t *rw)
} }
#endif /* _CK_RWLOCK_H */ #endif /* _CK_RWLOCK_H */

@ -51,7 +51,22 @@ main(void)
ck_rwlock_write_unlock(&rwlock); ck_rwlock_write_unlock(&rwlock);
} }
e_b = rdtsc(); e_b = rdtsc();
printf("WRITE: rwlock %15" PRIu64 "\n", (e_b - s_b) / STEPS); printf(" WRITE: rwlock %15" PRIu64 "\n", (e_b - s_b) / STEPS);
#ifdef CK_F_PR_RTM
for (i = 0; i < STEPS; i++) {
ck_rwlock_write_lock_rtm(&rwlock);
ck_rwlock_write_unlock_rtm(&rwlock);
}
s_b = rdtsc();
for (i = 0; i < STEPS; i++) {
ck_rwlock_write_lock_rtm(&rwlock);
ck_rwlock_write_unlock_rtm(&rwlock);
}
e_b = rdtsc();
printf(" (rtm) WRITE: rwlock %15" PRIu64 "\n", (e_b - s_b) / STEPS);
#endif /* CK_F_PR_RTM */
for (i = 0; i < STEPS; i++) { for (i = 0; i < STEPS; i++) {
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock(&rwlock);
@ -64,8 +79,23 @@ main(void)
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock(&rwlock);
} }
e_b = rdtsc(); e_b = rdtsc();
printf("READ: rwlock %15" PRIu64 "\n", (e_b - s_b) / STEPS); printf(" READ: rwlock %15" PRIu64 "\n", (e_b - s_b) / STEPS);
#ifdef CK_F_PR_RTM
for (i = 0; i < STEPS; i++) {
ck_rwlock_read_lock_rtm(&rwlock);
ck_rwlock_read_unlock_rtm(&rwlock);
}
s_b = rdtsc();
for (i = 0; i < STEPS; i++) {
ck_rwlock_read_lock_rtm(&rwlock);
ck_rwlock_read_unlock_rtm(&rwlock);
}
e_b = rdtsc();
printf(" (rtm) READ: rwlock %15" PRIu64 "\n", (e_b - s_b) / STEPS);
#endif /* CK_F_PR_RTM */
return (0); return 0;
} }

@ -41,11 +41,17 @@
static int barrier; static int barrier;
static int threads; static int threads;
static unsigned int flag CK_CC_CACHELINE; static unsigned int flag CK_CC_CACHELINE;
static ck_rwlock_t rwlock = CK_RWLOCK_INITIALIZER; static struct {
ck_rwlock_t lock;
} rw CK_CC_CACHELINE = {
.lock = CK_RWLOCK_INITIALIZER
};
static struct affinity affinity; static struct affinity affinity;
#ifdef CK_F_PR_RTM
static void * static void *
thread_rwlock(void *pun) thread_lock_rtm(void *pun)
{ {
uint64_t s_b, e_b, a, i; uint64_t s_b, e_b, a, i;
uint64_t *value = pun; uint64_t *value = pun;
@ -61,38 +67,38 @@ thread_rwlock(void *pun)
for (i = 1, a = 0;; i++) { for (i = 1, a = 0;; i++) {
s_b = rdtsc(); s_b = rdtsc();
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
ck_rwlock_read_lock(&rwlock); ck_rwlock_read_lock_rtm(&rw.lock);
ck_rwlock_read_unlock(&rwlock); ck_rwlock_read_unlock_rtm(&rw.lock);
e_b = rdtsc(); e_b = rdtsc();
a += (e_b - s_b) >> 4; a += (e_b - s_b) >> 4;
@ -108,39 +114,87 @@ thread_rwlock(void *pun)
*value = (a / i); *value = (a / i);
return NULL; return NULL;
} }
#endif /* CK_F_PR_RTM */
int static void *
main(int argc, char *argv[]) thread_lock(void *pun)
{ {
int t; uint64_t s_b, e_b, a, i;
pthread_t *p; uint64_t *value = pun;
uint64_t *latency;
if (argc != 3) { if (aff_iterate(&affinity) != 0) {
ck_error("Usage: throughput <delta> <threads>\n"); perror("ERROR: Could not affine thread");
exit(EXIT_FAILURE);
} }
threads = atoi(argv[2]); ck_pr_inc_int(&barrier);
if (threads <= 0) { while (ck_pr_load_int(&barrier) != threads)
ck_error("ERROR: Threads must be a value > 0.\n"); ck_pr_stall();
}
p = malloc(sizeof(pthread_t) * threads); for (i = 1, a = 0;; i++) {
if (p == NULL) { s_b = rdtsc();
ck_error("ERROR: Failed to initialize thread.\n"); ck_rwlock_read_lock(&rw.lock);
} ck_rwlock_read_unlock(&rw.lock);
ck_rwlock_read_lock(&rw.lock);
ck_rwlock_read_unlock(&rw.lock);
ck_rwlock_read_lock(&rw.lock);
ck_rwlock_read_unlock(&rw.lock);
ck_rwlock_read_lock(&rw.lock);
ck_rwlock_read_unlock(&rw.lock);
ck_rwlock_read_lock(&rw.lock);
ck_rwlock_read_unlock(&rw.lock);
ck_rwlock_read_lock(&rw.lock);
ck_rwlock_read_unlock(&rw.lock);
ck_rwlock_read_lock(&rw.lock);
ck_rwlock_read_unlock(&rw.lock);
ck_rwlock_read_lock(&rw.lock);
ck_rwlock_read_unlock(&rw.lock);
ck_rwlock_read_lock(&rw.lock);
ck_rwlock_read_unlock(&rw.lock);
ck_rwlock_read_lock(&rw.lock);
ck_rwlock_read_unlock(&rw.lock);
ck_rwlock_read_lock(&rw.lock);
ck_rwlock_read_unlock(&rw.lock);
ck_rwlock_read_lock(&rw.lock);
ck_rwlock_read_unlock(&rw.lock);
ck_rwlock_read_lock(&rw.lock);
ck_rwlock_read_unlock(&rw.lock);
ck_rwlock_read_lock(&rw.lock);
ck_rwlock_read_unlock(&rw.lock);
ck_rwlock_read_lock(&rw.lock);
ck_rwlock_read_unlock(&rw.lock);
ck_rwlock_read_lock(&rw.lock);
ck_rwlock_read_unlock(&rw.lock);
e_b = rdtsc();
latency = malloc(sizeof(uint64_t) * threads); a += (e_b - s_b) >> 4;
if (latency == NULL) {
ck_error("ERROR: Failed to create latency buffer.\n"); if (ck_pr_load_uint(&flag) == 1)
break;
} }
affinity.delta = atoi(argv[1]); ck_pr_inc_int(&barrier);
while (ck_pr_load_int(&barrier) != threads * 2)
ck_pr_stall();
*value = (a / i);
return NULL;
}
static void
rwlock_test(pthread_t *p, int d, uint64_t *latency, void *(*f)(void *), const char *label)
{
int t;
ck_pr_store_int(&barrier, 0);
ck_pr_store_uint(&flag, 0);
affinity.delta = d;
affinity.request = 0; affinity.request = 0;
fprintf(stderr, "Creating threads (rwlock)..."); fprintf(stderr, "Creating threads (%s)...", label);
for (t = 0; t < threads; t++) { for (t = 0; t < threads; t++) {
if (pthread_create(&p[t], NULL, thread_rwlock, latency + t) != 0) { if (pthread_create(&p[t], NULL, f, latency + t) != 0) {
ck_error("ERROR: Could not create thread %d\n", t); ck_error("ERROR: Could not create thread %d\n", t);
} }
} }
@ -157,6 +211,44 @@ main(int argc, char *argv[])
for (t = 1; t <= threads; t++) for (t = 1; t <= threads; t++)
printf("%10u %20" PRIu64 "\n", t, latency[t - 1]); printf("%10u %20" PRIu64 "\n", t, latency[t - 1]);
return (0); fprintf(stderr, "\n");
return;
}
int
main(int argc, char *argv[])
{
int d;
pthread_t *p;
uint64_t *latency;
if (argc != 3) {
ck_error("Usage: throughput <delta> <threads>\n");
}
threads = atoi(argv[2]);
if (threads <= 0) {
ck_error("ERROR: Threads must be a value > 0.\n");
}
p = malloc(sizeof(pthread_t) * threads);
if (p == NULL) {
ck_error("ERROR: Failed to initialize thread.\n");
}
latency = malloc(sizeof(uint64_t) * threads);
if (latency == NULL) {
ck_error("ERROR: Failed to create latency buffer.\n");
}
d = atoi(argv[1]);
rwlock_test(p, d, latency, thread_lock, "rwlock");
#ifdef CK_F_PR_RTM
rwlock_test(p, d, latency, thread_lock_rtm, "rwlock, rtm");
#endif /* CK_F_PR_RTM */
return 0;
} }

@ -123,10 +123,156 @@ thread_recursive(void *null CK_CC_UNUSED)
return (NULL); return (NULL);
} }
#ifdef CK_F_PR_RTM
static void *
thread_rtm_mix(void *null CK_CC_UNUSED)
{
unsigned int i = ITERATE;
unsigned int l;
if (aff_iterate(&a)) {
perror("ERROR: Could not affine thread");
exit(EXIT_FAILURE);
}
while (i--) {
if (i & 1) {
ck_rwlock_write_lock_rtm(&lock);
} else {
ck_rwlock_write_lock(&lock);
}
{
l = ck_pr_load_uint(&locked);
if (l != 0) {
ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l);
}
ck_pr_inc_uint(&locked);
ck_pr_inc_uint(&locked);
ck_pr_inc_uint(&locked);
ck_pr_inc_uint(&locked);
ck_pr_inc_uint(&locked);
ck_pr_inc_uint(&locked);
ck_pr_inc_uint(&locked);
ck_pr_inc_uint(&locked);
l = ck_pr_load_uint(&locked);
if (l != 8) {
ck_error("ERROR [WR:%d]: %u != 2\n", __LINE__, l);
}
ck_pr_dec_uint(&locked);
ck_pr_dec_uint(&locked);
ck_pr_dec_uint(&locked);
ck_pr_dec_uint(&locked);
ck_pr_dec_uint(&locked);
ck_pr_dec_uint(&locked);
ck_pr_dec_uint(&locked);
ck_pr_dec_uint(&locked);
l = ck_pr_load_uint(&locked);
if (l != 0) {
ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l);
}
}
if (i & 1) {
ck_rwlock_write_unlock_rtm(&lock);
} else {
ck_rwlock_write_unlock(&lock);
}
if (i & 1) {
ck_rwlock_read_lock_rtm(&lock);
} else {
ck_rwlock_read_lock(&lock);
}
{
l = ck_pr_load_uint(&locked);
if (l != 0) {
ck_error("ERROR [RD:%d]: %u != 0\n", __LINE__, l);
}
}
if (i & 1) {
ck_rwlock_read_unlock_rtm(&lock);
} else {
ck_rwlock_read_unlock(&lock);
}
}
return (NULL);
}
static void *
thread_rtm(void *null CK_CC_UNUSED)
{
unsigned int i = ITERATE;
unsigned int l;
if (aff_iterate(&a)) {
perror("ERROR: Could not affine thread");
exit(EXIT_FAILURE);
}
while (i--) {
ck_rwlock_write_lock_rtm(&lock);
{
l = ck_pr_load_uint(&locked);
if (l != 0) {
ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l);
}
ck_pr_inc_uint(&locked);
ck_pr_inc_uint(&locked);
ck_pr_inc_uint(&locked);
ck_pr_inc_uint(&locked);
ck_pr_inc_uint(&locked);
ck_pr_inc_uint(&locked);
ck_pr_inc_uint(&locked);
ck_pr_inc_uint(&locked);
l = ck_pr_load_uint(&locked);
if (l != 8) {
ck_error("ERROR [WR:%d]: %u != 2\n", __LINE__, l);
}
ck_pr_dec_uint(&locked);
ck_pr_dec_uint(&locked);
ck_pr_dec_uint(&locked);
ck_pr_dec_uint(&locked);
ck_pr_dec_uint(&locked);
ck_pr_dec_uint(&locked);
ck_pr_dec_uint(&locked);
ck_pr_dec_uint(&locked);
l = ck_pr_load_uint(&locked);
if (l != 0) {
ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l);
}
}
ck_rwlock_write_unlock_rtm(&lock);
ck_rwlock_read_lock_rtm(&lock);
{
l = ck_pr_load_uint(&locked);
if (l != 0) {
ck_error("ERROR [RD:%d]: %u != 0\n", __LINE__, l);
}
}
ck_rwlock_read_unlock_rtm(&lock);
}
return (NULL);
}
#endif /* CK_F_PR_RTM */
static void * static void *
thread(void *null CK_CC_UNUSED) thread(void *null CK_CC_UNUSED)
{ {
int i = ITERATE; unsigned int i = ITERATE;
unsigned int l; unsigned int l;
if (aff_iterate(&a)) { if (aff_iterate(&a)) {
@ -185,11 +331,29 @@ thread(void *null CK_CC_UNUSED)
return (NULL); return (NULL);
} }
static void
rwlock_test(pthread_t *threads, void *(*f)(void *), const char *test)
{
int i;
fprintf(stderr, "Creating threads (%s)...", test);
for (i = 0; i < nthr; i++) {
if (pthread_create(&threads[i], NULL, f, NULL)) {
ck_error("ERROR: Could not create thread %d\n", i);
}
}
fprintf(stderr, ".");
for (i = 0; i < nthr; i++)
pthread_join(threads[i], NULL);
fprintf(stderr, "done (passed)\n");
return;
}
int int
main(int argc, char *argv[]) main(int argc, char *argv[])
{ {
pthread_t *threads; pthread_t *threads;
int i;
if (argc != 3) { if (argc != 3) {
ck_error("Usage: validate <number of threads> <affinity delta>\n"); ck_error("Usage: validate <number of threads> <affinity delta>\n");
@ -207,32 +371,12 @@ main(int argc, char *argv[])
a.delta = atoi(argv[2]); a.delta = atoi(argv[2]);
fprintf(stderr, "Creating threads (mutual exclusion)..."); rwlock_test(threads, thread, "regular");
for (i = 0; i < nthr; i++) { #ifdef CK_F_PR_RTM
if (pthread_create(&threads[i], NULL, thread, NULL)) { rwlock_test(threads, thread_rtm, "rtm");
ck_error("ERROR: Could not create thread %d\n", i); rwlock_test(threads, thread_rtm_mix, "rtm-mix");
} #endif
} rwlock_test(threads, thread_recursive, "recursive");
fprintf(stderr, "done\n"); return 0;
fprintf(stderr, "Waiting for threads to finish correctness regression...");
for (i = 0; i < nthr; i++)
pthread_join(threads[i], NULL);
fprintf(stderr, "done (passed)\n");
fprintf(stderr, "Creating threads (mutual exclusion, recursive)...");
for (i = 0; i < nthr; i++) {
if (pthread_create(&threads[i], NULL, thread_recursive, NULL)) {
ck_error("ERROR: Could not create thread %d\n", i);
}
}
fprintf(stderr, "done\n");
fprintf(stderr, "Waiting for threads to finish correctness regression...");
for (i = 0; i < nthr; i++)
pthread_join(threads[i], NULL);
fprintf(stderr, "done (passed)\n");
return (0);
} }

Loading…
Cancel
Save