diff --git a/.gitignore b/.gitignore index 05a48ab..276ccc9 100644 --- a/.gitignore +++ b/.gitignore @@ -140,6 +140,7 @@ regressions/ck_brlock/benchmark/throughput regressions/ck_rwlock/benchmark/throughput regressions/ck_queue/validate/ck_list regressions/ck_queue/validate/ck_slist +regressions/ck_queue/validate/ck_stailq regressions/ck_cohort/validate/validate regressions/ck_cohort/benchmark/ck_cohort.LATENCY regressions/ck_cohort/benchmark/ck_cohort.THROUGHPUT diff --git a/configure b/configure index f4df79d..e439ace 100755 --- a/configure +++ b/configure @@ -579,11 +579,10 @@ mkdir -p $P_PWD/src if test "$P_PWD" '!=' "$BUILD_DIR"; then mkdir -p $P_PWD/regressions cp $BUILD_DIR/regressions/Makefile.unsupported $P_PWD/regressions/Makefile &> /dev/null + cp $BUILD_DIR/build/ck.build.$PROFILE $P_PWD/build/ck.build.$PROFILE &> /dev/null + cp $BUILD_DIR/include/ck_md.h $P_PWD/include/ck_md.h &> /dev/null fi -cp $BUILD_DIR/build/ck.build.$PROFILE $P_PWD/build/ck.build.$PROFILE &> /dev/null -cp $BUILD_DIR/include/ck_md.h $P_PWD/include/ck_md.h &> /dev/null - generate src/Makefile.in $P_PWD/src/Makefile generate doc/Makefile.in $P_PWD/doc/Makefile generate build/ck.build.in $P_PWD/build/ck.build diff --git a/doc/Makefile.in b/doc/Makefile.in index 397f883..df8d7e8 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -91,10 +91,17 @@ OBJECTS=ck_ht_count \ ck_pr \ ck_pr_barrier \ ck_pr_fas \ + ck_pr_fence_atomic \ + ck_pr_fence_atomic_load \ + ck_pr_fence_atomic_store \ ck_pr_fence_load \ + ck_pr_fence_load_atomic \ + ck_pr_fence_load_store \ ck_pr_fence_load_depends \ ck_pr_fence_memory \ ck_pr_fence_store \ + ck_pr_fence_store_atomic \ + ck_pr_fence_store_load \ ck_pr_stall \ ck_pr_faa \ ck_pr_inc \ diff --git a/doc/ck_pr_fence_atomic b/doc/ck_pr_fence_atomic new file mode 100644 index 0000000..452606b --- /dev/null +++ b/doc/ck_pr_fence_atomic @@ -0,0 +1,111 @@ +.\" +.\" Copyright 2013 Samy Al Bahra. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" +.Dd May 16, 2013 +.Dt CK_PR_FENCE_ATOMIC 3 +.Sh NAME +.Nm ck_pr_fence_atomic +.Nd enforce partial ordering of atomic read-modify-write operations +.Sh LIBRARY +Concurrency Kit (libck, \-lck) +.Sh SYNOPSIS +.In ck_pr.h +.Ft void +.Fn ck_pr_fence_atomic void +.Ft void +.Fn ck_pr_fence_strict_atomic void +.Sh DESCRIPTION +The +.Fn ck_pr_fence_atomic +function enfores the ordering of any +atomic read-modify-write operations relative to +the invocation of the function. This function +always serve as an implicit compiler barrier. On +architectures implementing CK_MD_TSO, this operation +only serves as a compiler barrier and no fences +are emitted. On architectures implementing +CK_MD_PSO and CK_MD_RMO, a store fence is +emitted. To force the unconditional emission of +a fence, use +.Fn ck_pr_fence_strict_atomic . +.Sh EXAMPLE +.Bd -literal -offset indent + +#include + +static int a = 0; +static int b = 0; +static int c = 0; + +void +function(void) +{ + + ck_pr_fas_int(&a, 1); + + /* + * Guarantee that the update to a is completed + * with respect to the updates of b and c. + */ + ck_pr_fence_atomic(); + ck_pr_fas_int(&b, 2); + ck_pr_fas_int(&c, 2); + + return; +} +.Ed +.Sh RETURN VALUES +This function has no return value. +.Sh SEE ALSO +.Xr ck_pr_stall 3 , +.Xr ck_pr_fence_atomic_store 3 , +.Xr ck_pr_fence_atomic_load 3 , +.Xr ck_pr_fence_store 3 , +.Xr ck_pr_fence_load 3 , +.Xr ck_pr_fence_load_atomic 3 , +.Xr ck_pr_fence_load_store 3 , +.Xr ck_pr_fence_load_depends 3 , +.Xr ck_pr_fence_memory 3 , +.Xr ck_pr_barrier 3 , +.Xr ck_pr_fas 3 , +.Xr ck_pr_load 3 , +.Xr ck_pr_store 3 , +.Xr ck_pr_faa 3 , +.Xr ck_pr_inc 3 , +.Xr ck_pr_dec 3 , +.Xr ck_pr_neg 3 , +.Xr ck_pr_not 3 , +.Xr ck_pr_add 3 , +.Xr ck_pr_sub 3 , +.Xr ck_pr_and 3 , +.Xr ck_pr_or 3 , +.Xr ck_pr_xor 3 , +.Xr ck_pr_cas 3 , +.Xr ck_pr_btc 3 , +.Xr ck_pr_bts 3 , +.Xr ck_pr_btr 3 +.Pp +Additional information available at http://concurrencykit.org/ diff --git a/doc/ck_pr_fence_atomic_load b/doc/ck_pr_fence_atomic_load new file mode 100644 index 0000000..cbefdaa --- /dev/null +++ b/doc/ck_pr_fence_atomic_load @@ -0,0 +1,108 @@ +.\" +.\" Copyright 2013 Samy Al Bahra. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" +.Dd May 16, 2013 +.Dt CK_PR_FENCE_ATOMIC_LOAD 3 +.Sh NAME +.Nm ck_pr_fence_atomic_load +.Nd enforce ordering of atomic read-modify-write operations to load operations +.Sh LIBRARY +Concurrency Kit (libck, \-lck) +.Sh SYNOPSIS +.In ck_pr.h +.Ft void +.Fn ck_pr_fence_atomic_load void +.Ft void +.Fn ck_pr_fence_strict_atomic_load void +.Sh DESCRIPTION +The +.Fn ck_pr_fence_atomic_load +function enfores the ordering of any +atomic read-modify-write operations relative to +any load operations following the function invocation. This function +always serve as an implicit compiler barrier. On +architectures implementing CK_MD_TSO, this operation +only serves as a compiler barrier and no fences +are emitted. To force the unconditional emission of +a fence, use +.Fn ck_pr_fence_strict_atomic_load . +.Sh EXAMPLE +.Bd -literal -offset indent + +#include + +static int a = 0; +static int b = 0; + +void +function(void) +{ + int c; + + ck_pr_fas_int(&a, 1); + + /* + * Guarantee that the update to a is completed + * with respect to the load of *b. + */ + ck_pr_fence_atomic_load(); + c = ck_pr_load_int(&b); + + return; +} +.Ed +.Sh RETURN VALUES +This function has no return value. +.Sh SEE ALSO +.Xr ck_pr_stall 3 , +.Xr ck_pr_fence_atomic 3 , +.Xr ck_pr_fence_atomic_store 3 , +.Xr ck_pr_fence_store 3 , +.Xr ck_pr_fence_load 3 , +.Xr ck_pr_fence_load_atomic 3 , +.Xr ck_pr_fence_load_store 3 , +.Xr ck_pr_fence_load_depends 3 , +.Xr ck_pr_fence_memory 3 , +.Xr ck_pr_barrier 3 , +.Xr ck_pr_fas 3 , +.Xr ck_pr_load 3 , +.Xr ck_pr_store 3 , +.Xr ck_pr_faa 3 , +.Xr ck_pr_inc 3 , +.Xr ck_pr_dec 3 , +.Xr ck_pr_neg 3 , +.Xr ck_pr_not 3 , +.Xr ck_pr_add 3 , +.Xr ck_pr_sub 3 , +.Xr ck_pr_and 3 , +.Xr ck_pr_or 3 , +.Xr ck_pr_xor 3 , +.Xr ck_pr_cas 3 , +.Xr ck_pr_btc 3 , +.Xr ck_pr_bts 3 , +.Xr ck_pr_btr 3 +.Pp +Additional information available at http://concurrencykit.org/ diff --git a/doc/ck_pr_fence_atomic_store b/doc/ck_pr_fence_atomic_store new file mode 100644 index 0000000..a14867c --- /dev/null +++ b/doc/ck_pr_fence_atomic_store @@ -0,0 +1,109 @@ +.\" +.\" Copyright 2013 Samy Al Bahra. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" +.Dd May 16, 2013 +.Dt CK_PR_FENCE_ATOMIC_STORE 3 +.Sh NAME +.Nm ck_pr_fence_atomic_store +.Nd enforce ordering of atomic read-modify-write operations to store operations +.Sh LIBRARY +Concurrency Kit (libck, \-lck) +.Sh SYNOPSIS +.In ck_pr.h +.Ft void +.Fn ck_pr_fence_atomic_store void +.Ft void +.Fn ck_pr_fence_strict_atomic_store void +.Sh DESCRIPTION +The +.Fn ck_pr_fence_atomic_store +function enfores the ordering of any +atomic read-modify-write operations relative to +any load operations following the function invocation. This function +always serve as an implicit compiler barrier. On +architectures implementing CK_MD_TSO, this operation +only serves as a compiler barrier and no fences +are emitted. To force the unconditional emission of +a fence, use +.Fn ck_pr_fence_strict_atomic_store . +.Sh EXAMPLE +.Bd -literal -offset indent + +#include + +static int a = 0; +static int b = 0; + +void +function(void) +{ + int c; + + ck_pr_fas_int(&a, 1); + + /* + * Guarantee that the update to a is completed + * with respect to the store into the value pointed + * to by b. + */ + ck_pr_fence_atomic_store(); + c = ck_pr_store_int(&b, 2); + + return; +} +.Ed +.Sh RETURN VALUES +This function has no return value. +.Sh SEE ALSO +.Xr ck_pr_stall 3 , +.Xr ck_pr_fence_atomic 3 , +.Xr ck_pr_fence_atomic_load 3 , +.Xr ck_pr_fence_store 3 , +.Xr ck_pr_fence_load 3 , +.Xr ck_pr_fence_load_atomic 3 , +.Xr ck_pr_fence_load_store 3 , +.Xr ck_pr_fence_load_depends 3 , +.Xr ck_pr_fence_memory 3 , +.Xr ck_pr_barrier 3 , +.Xr ck_pr_fas 3 , +.Xr ck_pr_load 3 , +.Xr ck_pr_store 3 , +.Xr ck_pr_faa 3 , +.Xr ck_pr_inc 3 , +.Xr ck_pr_dec 3 , +.Xr ck_pr_neg 3 , +.Xr ck_pr_not 3 , +.Xr ck_pr_add 3 , +.Xr ck_pr_sub 3 , +.Xr ck_pr_and 3 , +.Xr ck_pr_or 3 , +.Xr ck_pr_xor 3 , +.Xr ck_pr_cas 3 , +.Xr ck_pr_btc 3 , +.Xr ck_pr_bts 3 , +.Xr ck_pr_btr 3 +.Pp +Additional information available at http://concurrencykit.org/ diff --git a/doc/ck_pr_fence_load b/doc/ck_pr_fence_load index 97cb848..da8e6d4 100644 --- a/doc/ck_pr_fence_load +++ b/doc/ck_pr_fence_load @@ -83,6 +83,11 @@ function(void) This function has no return value. .Sh SEE ALSO .Xr ck_pr_stall 3 , +.Xr ck_pr_fence_atomic 3 , +.Xr ck_pr_fence_atomic_store 3 , +.Xr ck_pr_fence_atomic_load 3 , +.Xr ck_pr_fence_load_atomic 3 , +.Xr ck_pr_fence_load_store 3 , .Xr ck_pr_fence_load_depends 3 , .Xr ck_pr_fence_store 3 , .Xr ck_pr_fence_memory 3 , diff --git a/doc/ck_pr_fence_load_atomic b/doc/ck_pr_fence_load_atomic new file mode 100644 index 0000000..774a263 --- /dev/null +++ b/doc/ck_pr_fence_load_atomic @@ -0,0 +1,113 @@ +.\" +.\" Copyright 2013 Samy Al Bahra. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" +.Dd May 18, 2013 +.Dt CK_PR_FENCE_LOAD_ATOMIC 3 +.Sh NAME +.Nm ck_pr_fence_load_atomic +.Nd enforce ordering of load operations to atomic read-modify-write operations +.Sh LIBRARY +Concurrency Kit (libck, \-lck) +.Sh SYNOPSIS +.In ck_pr.h +.Ft void +.Fn ck_pr_fence_load_atomic void +.Ft void +.Fn ck_pr_fence_strict_load_atomic void +.Sh DESCRIPTION +This function enforces the ordering of any memory load +and +.Fn ck_pr_load 3 +operations with respect to store operations relative to +the invocation of the function. Any store operations that +were committed on remote processors +and received by the calling processor before the invocation of +.Fn ck_pr_fence_load_atomic +is also be made visible only after a call to +the ck_pr_fence_load family of functions. +This function always serves as an implicit compiler barrier. +On architectures with CK_MD_TSO or CK_MD_PSO specified (total store ordering +and partial store ordering respectively), this operation only serves +as a compiler barrier and no fence instructions will be emitted. To +force the unconditional emission of a load fence, use +.Fn ck_pr_fence_strict_load_atomic . +Architectures implementing CK_MD_RMO always emit a fence. +.Sh EXAMPLE +.Bd -literal -offset indent + +#include + +static unsigned int a; +static unsigned int b; + +void +function(void) +{ + unsigned int snapshot_a, snapshot_b; + + snapshot_a = ck_pr_load_uint(&a); + + /* + * Guarantee that the load from "a" completes + * before the update to "b". + */ + ck_pr_fence_load_atomic(); + ck_pr_fas_uint(&b, 1); + + return; +} +.Ed +.Sh RETURN VALUES +This function has no return value. +.Sh SEE ALSO +.Xr ck_pr_stall 3 , +.Xr ck_pr_fence_atomic 3 , +.Xr ck_pr_fence_atomic_store 3 , +.Xr ck_pr_fence_atomic_load 3 , +.Xr ck_pr_fence_load_depends 3 , +.Xr ck_pr_fence_load_store 3 , +.Xr ck_pr_fence_store 3 , +.Xr ck_pr_fence_memory 3 , +.Xr ck_pr_barrier 3 , +.Xr ck_pr_fas 3 , +.Xr ck_pr_load 3 , +.Xr ck_pr_store 3 , +.Xr ck_pr_faa 3 , +.Xr ck_pr_inc 3 , +.Xr ck_pr_dec 3 , +.Xr ck_pr_neg 3 , +.Xr ck_pr_not 3 , +.Xr ck_pr_add 3 , +.Xr ck_pr_sub 3 , +.Xr ck_pr_and 3 , +.Xr ck_pr_or 3 , +.Xr ck_pr_xor 3 , +.Xr ck_pr_cas 3 , +.Xr ck_pr_btc 3 , +.Xr ck_pr_bts 3 , +.Xr ck_pr_btr 3 +.Pp +Additional information available at http://concurrencykit.org/ diff --git a/doc/ck_pr_fence_load_depends b/doc/ck_pr_fence_load_depends index 7a98389..38718ec 100644 --- a/doc/ck_pr_fence_load_depends +++ b/doc/ck_pr_fence_load_depends @@ -45,7 +45,12 @@ which re-orders data-dependent loads (such as the defunct Alpha), this function This function has no return value. .Sh SEE ALSO .Xr ck_pr_stall 3 , +.Xr ck_pr_fence_atomic 3 , +.Xr ck_pr_fence_atomic_store 3 , +.Xr ck_pr_fence_atomic_load 3 , .Xr ck_pr_fence_load 3 , +.Xr ck_pr_fence_load_atomic 3 , +.Xr ck_pr_fence_load_store 3 , .Xr ck_pr_fence_store 3 , .Xr ck_pr_fence_memory 3 , .Xr ck_pr_barrier 3 , diff --git a/doc/ck_pr_fence_load_store b/doc/ck_pr_fence_load_store new file mode 100644 index 0000000..378903e --- /dev/null +++ b/doc/ck_pr_fence_load_store @@ -0,0 +1,113 @@ +.\" +.\" Copyright 2013 Samy Al Bahra. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" +.Dd May 18, 2013 +.Dt CK_PR_FENCE_LOAD_STORE 3 +.Sh NAME +.Nm ck_pr_fence_load_store +.Nd enforce ordering of load operations to store operations +.Sh LIBRARY +Concurrency Kit (libck, \-lck) +.Sh SYNOPSIS +.In ck_pr.h +.Ft void +.Fn ck_pr_fence_load_store void +.Ft void +.Fn ck_pr_fence_strict_load_store void +.Sh DESCRIPTION +This function enforces the ordering of any memory load +and +.Fn ck_pr_load 3 +operations with respect to store operations relative to +the invocation of the function. Any store operations that +were committed on remote processors +and received by the calling processor before the invocation of +.Fn ck_pr_fence_load_store +is also be made visible only after a call to +the ck_pr_fence_load family of functions. +This function always serves as an implicit compiler barrier. +On architectures with CK_MD_TSO or CK_MD_PSO specified (total store ordering +and partial store ordering respectively), this operation only serves +as a compiler barrier and no fence instructions will be emitted. To +force the unconditional emission of a load fence, use +.Fn ck_pr_fence_strict_load_store . +Architectures implementing CK_MD_RMO always emit a fence. +.Sh EXAMPLE +.Bd -literal -offset indent + +#include + +static unsigned int a; +static unsigned int b; + +void +function(void) +{ + unsigned int snapshot_a; + + snapshot_a = ck_pr_load_uint(&a); + + /* + * Guarantee that the load from "a" completes + * before the store to "b". + */ + ck_pr_fence_load_store(); + ck_pr_store_uint(&b, 1); + + return; +} +.Ed +.Sh RETURN VALUES +This function has no return value. +.Sh SEE ALSO +.Xr ck_pr_stall 3 , +.Xr ck_pr_fence_atomic 3 , +.Xr ck_pr_fence_atomic_store 3 , +.Xr ck_pr_fence_atomic_load 3 , +.Xr ck_pr_fence_load_depends 3 , +.Xr ck_pr_fence_load_atomic 3 , +.Xr ck_pr_fence_store 3 , +.Xr ck_pr_fence_memory 3 , +.Xr ck_pr_barrier 3 , +.Xr ck_pr_fas 3 , +.Xr ck_pr_load 3 , +.Xr ck_pr_store 3 , +.Xr ck_pr_faa 3 , +.Xr ck_pr_inc 3 , +.Xr ck_pr_dec 3 , +.Xr ck_pr_neg 3 , +.Xr ck_pr_not 3 , +.Xr ck_pr_add 3 , +.Xr ck_pr_sub 3 , +.Xr ck_pr_and 3 , +.Xr ck_pr_or 3 , +.Xr ck_pr_xor 3 , +.Xr ck_pr_cas 3 , +.Xr ck_pr_btc 3 , +.Xr ck_pr_bts 3 , +.Xr ck_pr_btr 3 +.Pp +Additional information available at http://concurrencykit.org/ diff --git a/doc/ck_pr_fence_memory b/doc/ck_pr_fence_memory index 31eed57..f223527 100644 --- a/doc/ck_pr_fence_memory +++ b/doc/ck_pr_fence_memory @@ -85,6 +85,9 @@ function(void) This function has no return value. .Sh SEE ALSO .Xr ck_pr_stall 3 , +.Xr ck_pr_fence_atomic 3 , +.Xr ck_pr_fence_atomic_store 3 , +.Xr ck_pr_fence_atomic_load 3 , .Xr ck_pr_fence_load 3 , .Xr ck_pr_fence_load_depends 3 , .Xr ck_pr_fence_store 3 , diff --git a/doc/ck_pr_fence_store b/doc/ck_pr_fence_store index 0fa573d..5bb8f00 100644 --- a/doc/ck_pr_fence_store +++ b/doc/ck_pr_fence_store @@ -82,7 +82,12 @@ function(void) This function has no return value. .Sh SEE ALSO .Xr ck_pr_stall 3 , +.Xr ck_pr_fence_atomic 3 , +.Xr ck_pr_fence_atomic_store 3 , +.Xr ck_pr_fence_atomic_load 3 , .Xr ck_pr_fence_load 3 , +.Xr ck_pr_fence_load_atomic 3 , +.Xr ck_pr_fence_load_store 3 , .Xr ck_pr_fence_load_depends 3 , .Xr ck_pr_fence_memory 3 , .Xr ck_pr_barrier 3 , diff --git a/doc/ck_pr_fence_store_atomic b/doc/ck_pr_fence_store_atomic new file mode 100644 index 0000000..a559f22 --- /dev/null +++ b/doc/ck_pr_fence_store_atomic @@ -0,0 +1,108 @@ +.\" +.\" Copyright 2013 Samy Al Bahra. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" +.Dd May 18, 2013 +.Dt CK_PR_FENCE_STORE_ATOMIC 3 +.Sh NAME +.Nm ck_pr_fence_store_atomic +.Nd enforce ordering of store operations to load operations +.Sh LIBRARY +Concurrency Kit (libck, \-lck) +.Sh SYNOPSIS +.In ck_pr.h +.Ft void +.Fn ck_pr_fence_store_atomic void +.Ft void +.Fn ck_pr_fence_strict_store_atomic void +.Sh DESCRIPTION +The +.Fn ck_pr_fence_store_atomic +function enfores the ordering of any memory store, +.Fn ck_pr_store +and atomic read-modify-write operations to atomic read-modify-write +operations relative to the invocation of the function. This function +always serve as an implicit compiler barrier. +This functions will emit a fence for PSO and RMO +targets. In order to force the emission of a fence use the +.Fn ck_pr_fence_strict_store_atomic +function. +.Sh EXAMPLE +.Bd -literal -offset indent + +#include + +static int a = 0; +static int b = 0; + +void +function(void) +{ + + ck_pr_store_int(&a, 1); + + /* + * Guarantee that the store to a is completed + * with respect to the update of b. + */ + ck_pr_fence_store_atomic(); + ck_pr_add_int(&b, 2); + return; +} +.Ed +.Sh RETURN VALUES +This function has no return value. +.Sh SEE ALSO +.Xr ck_pr_stall 3 , +.Xr ck_pr_fence_atomic 3 , +.Xr ck_pr_fence_atomic_store 3 , +.Xr ck_pr_fence_atomic_load 3 , +.Xr ck_pr_fence_load 3 , +.Xr ck_pr_fence_load_atomic 3 , +.Xr ck_pr_fence_load_store 3 , +.Xr ck_pr_fence_load_depends 3 , +.Xr ck_pr_fence_store 3 , +.Xr ck_pr_fence_store_load 3 , +.Xr ck_pr_fence_memory 3 , +.Xr ck_pr_barrier 3 , +.Xr ck_pr_fas 3 , +.Xr ck_pr_load 3 , +.Xr ck_pr_store 3 , +.Xr ck_pr_faa 3 , +.Xr ck_pr_inc 3 , +.Xr ck_pr_dec 3 , +.Xr ck_pr_neg 3 , +.Xr ck_pr_not 3 , +.Xr ck_pr_add 3 , +.Xr ck_pr_sub 3 , +.Xr ck_pr_and 3 , +.Xr ck_pr_or 3 , +.Xr ck_pr_xor 3 , +.Xr ck_pr_cas 3 , +.Xr ck_pr_btc 3 , +.Xr ck_pr_bts 3 , +.Xr ck_pr_btr 3 +.Pp +Additional information available at http://concurrencykit.org/ diff --git a/doc/ck_pr_fence_store_load b/doc/ck_pr_fence_store_load new file mode 100644 index 0000000..40d1875 --- /dev/null +++ b/doc/ck_pr_fence_store_load @@ -0,0 +1,107 @@ +.\" +.\" Copyright 2013 Samy Al Bahra. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" +.Dd May 18, 2013 +.Dt CK_PR_FENCE_STORE_LOAD 3 +.Sh NAME +.Nm ck_pr_fence_store_load +.Nd enforce ordering of store operations to load operations +.Sh LIBRARY +Concurrency Kit (libck, \-lck) +.Sh SYNOPSIS +.In ck_pr.h +.Ft void +.Fn ck_pr_fence_store_load void +.Ft void +.Fn ck_pr_fence_strict_store_load void +.Sh DESCRIPTION +The +.Fn ck_pr_fence_store_load +function enfores the ordering of any memory store, +.Fn ck_pr_store +and atomic read-modify-write operations to load +operations relative to the invocation of the function. This function +always serve as an implicit compiler barrier. +A fence will currently always be emitted for this +operation, including for TSO memory model targets. +.Sh EXAMPLE +.Bd -literal -offset indent + +#include + +static int a = 0; +static int b = 0; + +void +function(void) +{ + unsigned int snapshot_b; + + ck_pr_store_int(&a, 1); + + /* + * Guarantee that the store to a is completed + * with respect to load from b. + */ + ck_pr_fence_store_load(); + snapshot_b = ck_pr_load_int(&b, 2); + return; +} +.Ed +.Sh RETURN VALUES +This function has no return value. +.Sh SEE ALSO +.Xr ck_pr_stall 3 , +.Xr ck_pr_fence_atomic 3 , +.Xr ck_pr_fence_atomic_store 3 , +.Xr ck_pr_fence_atomic_load 3 , +.Xr ck_pr_fence_load 3 , +.Xr ck_pr_fence_load_atomic 3 , +.Xr ck_pr_fence_load_store 3 , +.Xr ck_pr_fence_load_depends 3 , +.Xr ck_pr_fence_store 3 , +.Xr ck_pr_fence_store_atomic 3 , +.Xr ck_pr_fence_memory 3 , +.Xr ck_pr_barrier 3 , +.Xr ck_pr_fas 3 , +.Xr ck_pr_load 3 , +.Xr ck_pr_store 3 , +.Xr ck_pr_faa 3 , +.Xr ck_pr_inc 3 , +.Xr ck_pr_dec 3 , +.Xr ck_pr_neg 3 , +.Xr ck_pr_not 3 , +.Xr ck_pr_add 3 , +.Xr ck_pr_sub 3 , +.Xr ck_pr_and 3 , +.Xr ck_pr_or 3 , +.Xr ck_pr_xor 3 , +.Xr ck_pr_cas 3 , +.Xr ck_pr_btc 3 , +.Xr ck_pr_bts 3 , +.Xr ck_pr_btr 3 +.Pp +Additional information available at http://concurrencykit.org/ diff --git a/include/ck_brlock.h b/include/ck_brlock.h index 7b1f27d..4246c7d 100644 --- a/include/ck_brlock.h +++ b/include/ck_brlock.h @@ -83,7 +83,7 @@ ck_brlock_write_lock(struct ck_brlock *br) while (ck_pr_fas_uint(&br->writer, true) == true) ck_pr_stall(); - ck_pr_fence_memory(); + ck_pr_fence_atomic_load(); /* The reader list is protected under the writer br. */ for (cursor = br->readers; cursor != NULL; cursor = cursor->next) { @@ -121,7 +121,7 @@ ck_brlock_write_trylock(struct ck_brlock *br, unsigned int factor) * We do not require a strict fence here as atomic RMW operations * are serializing. */ - ck_pr_fence_memory(); + ck_pr_fence_atomic_load(); for (cursor = br->readers; cursor != NULL; cursor = cursor->next) { while (ck_pr_load_uint(&cursor->n_readers) != 0) { @@ -190,13 +190,19 @@ ck_brlock_read_lock(struct ck_brlock *br, struct ck_brlock_reader *reader) #if defined(__x86__) || defined(__x86_64__) ck_pr_fas_uint(&reader->n_readers, 1); - /* Serialize counter update with respect to writer snapshot. */ - ck_pr_fence_memory(); + /* + * Serialize reader counter update with respect to load of + * writer. + */ + ck_pr_fence_atomic_load(); #else ck_pr_store_uint(&reader->n_readers, 1); - /* Loads can be re-ordered before previous stores, even on TSO. */ - ck_pr_fence_strict_memory(); + /* + * Serialize reader counter update with respect to load of + * writer. + */ + ck_pr_fence_store_load(); #endif if (ck_pr_load_uint(&br->writer) == false) @@ -229,10 +235,23 @@ ck_brlock_read_trylock(struct ck_brlock *br, ck_pr_stall(); } +#if defined(__x86__) || defined(__x86_64__) + ck_pr_fas_uint(&reader->n_readers, 1); + + /* + * Serialize reader counter update with respect to load of + * writer. + */ + ck_pr_fence_atomic_load(); +#else ck_pr_store_uint(&reader->n_readers, 1); - /* Loads are re-ordered with respect to prior stores. */ - ck_pr_fence_strict_memory(); + /* + * Serialize reader counter update with respect to load of + * writer. + */ + ck_pr_fence_store_load(); +#endif if (ck_pr_load_uint(&br->writer) == false) break; diff --git a/include/ck_bytelock.h b/include/ck_bytelock.h index f73adb2..9d42393 100644 --- a/include/ck_bytelock.h +++ b/include/ck_bytelock.h @@ -93,7 +93,7 @@ ck_bytelock_write_lock(struct ck_bytelock *bytelock, unsigned int slot) ck_pr_store_8(&bytelock->readers[slot - 1], false); /* Wait for slotted readers to drain out. */ - ck_pr_fence_strict_memory(); + ck_pr_fence_store_load(); for (i = 0; i < sizeof(bytelock->readers) / CK_BYTELOCK_LENGTH; i++) { while (CK_BYTELOCK_LOAD((CK_BYTELOCK_TYPE *)&readers[i]) != false) ck_pr_stall(); @@ -134,7 +134,7 @@ ck_bytelock_read_lock(struct ck_bytelock *bytelock, unsigned int slot) if (slot > sizeof bytelock->readers) { for (;;) { ck_pr_inc_uint(&bytelock->n_readers); - ck_pr_fence_memory(); + ck_pr_fence_atomic_load(); if (ck_pr_load_uint(&bytelock->owner) == 0) break; ck_pr_dec_uint(&bytelock->n_readers); @@ -150,7 +150,7 @@ ck_bytelock_read_lock(struct ck_bytelock *bytelock, unsigned int slot) slot -= 1; for (;;) { ck_pr_store_8(&bytelock->readers[slot], true); - ck_pr_fence_strict_memory(); + ck_pr_fence_store_load(); /* * If there is no owner at this point, our slot has diff --git a/include/ck_epoch.h b/include/ck_epoch.h index 4624bdf..c300a11 100644 --- a/include/ck_epoch.h +++ b/include/ck_epoch.h @@ -97,12 +97,11 @@ ck_epoch_begin(ck_epoch_t *epoch, ck_epoch_record_t *record) /* * It is possible for loads to be re-ordered before the store * is committed into the caller's epoch and active fields. - * Execute a full barrier to serialize stores with respect to - * loads + * For this reason, store to load serialization is necessary. */ ck_pr_store_uint(&record->epoch, g_epoch); ck_pr_store_uint(&record->active, 1); - ck_pr_fence_strict_memory(); + ck_pr_fence_store_load(); return; } diff --git a/include/ck_fifo.h b/include/ck_fifo.h index 5805c49..cafaa63 100644 --- a/include/ck_fifo.h +++ b/include/ck_fifo.h @@ -237,7 +237,7 @@ ck_fifo_mpmc_enqueue(struct ck_fifo_mpmc *fifo, entry->value = value; entry->next.pointer = NULL; entry->next.generation = 0; - ck_pr_fence_store(); + ck_pr_fence_store_atomic(); for (;;) { tail.generation = ck_pr_load_ptr(&fifo->tail.generation); @@ -271,9 +271,10 @@ ck_fifo_mpmc_enqueue(struct ck_fifo_mpmc *fifo, } } + ck_pr_fence_atomic(); + /* After a successful insert, forward the tail to the new entry. */ update.generation = tail.generation + 1; - ck_pr_fence_store(); ck_pr_cas_ptr_2(&fifo->tail, &tail, &update); return; } @@ -289,7 +290,7 @@ ck_fifo_mpmc_tryenqueue(struct ck_fifo_mpmc *fifo, entry->next.pointer = NULL; entry->next.generation = 0; - ck_pr_fence_store(); + ck_pr_fence_store_atomic(); tail.generation = ck_pr_load_ptr(&fifo->tail.generation); ck_pr_fence_load(); @@ -322,8 +323,9 @@ ck_fifo_mpmc_tryenqueue(struct ck_fifo_mpmc *fifo, return false; } + ck_pr_fence_atomic(); + /* After a successful insert, forward the tail to the new entry. */ - ck_pr_fence_store(); update.generation = tail.generation + 1; ck_pr_cas_ptr_2(&fifo->tail, &tail, &update); return true; diff --git a/include/ck_hp_fifo.h b/include/ck_hp_fifo.h index 41064f3..3e52afb 100644 --- a/include/ck_hp_fifo.h +++ b/include/ck_hp_fifo.h @@ -76,12 +76,12 @@ ck_hp_fifo_enqueue_mpmc(ck_hp_record_t *record, entry->value = value; entry->next = NULL; - ck_pr_fence_store(); + ck_pr_fence_store_atomic(); for (;;) { tail = ck_pr_load_ptr(&fifo->tail); ck_hp_set(record, 0, tail); - ck_pr_fence_strict_memory(); + ck_pr_fence_store_load(); if (tail != ck_pr_load_ptr(&fifo->tail)) continue; @@ -93,7 +93,7 @@ ck_hp_fifo_enqueue_mpmc(ck_hp_record_t *record, break; } - ck_pr_fence_store(); + ck_pr_fence_atomic(); ck_pr_cas_ptr(&fifo->tail, tail, entry); return; } @@ -108,11 +108,11 @@ ck_hp_fifo_tryenqueue_mpmc(ck_hp_record_t *record, entry->value = value; entry->next = NULL; - ck_pr_fence_store(); + ck_pr_fence_store_atomic(); tail = ck_pr_load_ptr(&fifo->tail); ck_hp_set(record, 0, tail); - ck_pr_fence_strict_memory(); + ck_pr_fence_store_load(); if (tail != ck_pr_load_ptr(&fifo->tail)) return false; @@ -123,7 +123,7 @@ ck_hp_fifo_tryenqueue_mpmc(ck_hp_record_t *record, } else if (ck_pr_cas_ptr(&fifo->tail->next, next, entry) == false) return false; - ck_pr_fence_store(); + ck_pr_fence_atomic(); ck_pr_cas_ptr(&fifo->tail, tail, entry); return true; } @@ -140,13 +140,13 @@ ck_hp_fifo_dequeue_mpmc(ck_hp_record_t *record, ck_pr_fence_load(); tail = ck_pr_load_ptr(&fifo->tail); ck_hp_set(record, 0, head); - ck_pr_fence_strict_memory(); + ck_pr_fence_store_load(); if (head != ck_pr_load_ptr(&fifo->head)) continue; next = ck_pr_load_ptr(&head->next); ck_hp_set(record, 1, next); - ck_pr_fence_strict_memory(); + ck_pr_fence_store_load(); if (head != ck_pr_load_ptr(&fifo->head)) continue; @@ -175,13 +175,13 @@ ck_hp_fifo_trydequeue_mpmc(ck_hp_record_t *record, ck_pr_fence_load(); tail = ck_pr_load_ptr(&fifo->tail); ck_hp_set(record, 0, head); - ck_pr_fence_strict_memory(); + ck_pr_fence_store_load(); if (head != ck_pr_load_ptr(&fifo->head)) return NULL; next = ck_pr_load_ptr(&head->next); ck_hp_set(record, 1, next); - ck_pr_fence_strict_memory(); + ck_pr_fence_store_load(); if (head != ck_pr_load_ptr(&fifo->head)) return NULL; diff --git a/include/ck_hp_stack.h b/include/ck_hp_stack.h index 2a7856c..7ac8821 100644 --- a/include/ck_hp_stack.h +++ b/include/ck_hp_stack.h @@ -62,7 +62,7 @@ ck_hp_stack_pop_mpmc(ck_hp_record_t *record, struct ck_stack *target) return NULL; ck_hp_set(record, 0, entry); - ck_pr_fence_strict_memory(); + ck_pr_fence_store_load(); } while (entry != ck_pr_load_ptr(&target->head)); while (ck_pr_cas_ptr_value(&target->head, entry, entry->next, &entry) == false) { @@ -70,11 +70,11 @@ ck_hp_stack_pop_mpmc(ck_hp_record_t *record, struct ck_stack *target) return NULL; ck_hp_set(record, 0, entry); - ck_pr_fence_strict_memory(); + ck_pr_fence_store_load(); update = ck_pr_load_ptr(&target->head); while (entry != update) { ck_hp_set(record, 0, update); - ck_pr_fence_strict_memory(); + ck_pr_fence_store_load(); entry = update; update = ck_pr_load_ptr(&target->head); if (update == NULL) @@ -95,7 +95,7 @@ ck_hp_stack_trypop_mpmc(ck_hp_record_t *record, struct ck_stack *target, struct return false; ck_hp_set(record, 0, entry); - ck_pr_fence_strict_memory(); + ck_pr_fence_store_load(); if (entry != ck_pr_load_ptr(&target->head)) goto leave; diff --git a/include/ck_pr.h b/include/ck_pr.h index 35540b3..b3e8094 100644 --- a/include/ck_pr.h +++ b/include/ck_pr.h @@ -30,6 +30,7 @@ #include #include +#include #include #include @@ -43,12 +44,90 @@ #include "gcc/ppc64/ck_pr.h" #elif defined(__ppc__) #include "gcc/ppc/ck_pr.h" -#elif defined(__GNUC__) -#include "gcc/ck_pr.h" -#else +#elif !defined(__GNUC__) #error Your platform is unsupported #endif +#if defined(__GNUC__) +#include "gcc/ck_pr.h" +#endif + +#define CK_PR_FENCE_EMIT(T) \ + CK_CC_INLINE static void \ + ck_pr_fence_##T(void) \ + { \ + ck_pr_fence_strict_##T(); \ + return; \ + } +#define CK_PR_FENCE_NOOP(T) \ + CK_CC_INLINE static void \ + ck_pr_fence_##T(void) \ + { \ + ck_pr_barrier(); \ + return; \ + } + +/* + * None of the currently supported platforms allow for data-dependent + * load ordering. + */ +CK_PR_FENCE_NOOP(load_depends) +#define ck_pr_fence_strict_load_depends ck_pr_fence_load_depends + +/* + * In memory models where atomic operations do not have serializing + * effects, atomic read-modify-write operations are modeled as stores. + */ +#if defined(CK_MD_RMO) +/* + * Only stores to the same location have a global + * ordering. + */ +CK_PR_FENCE_EMIT(atomic) +CK_PR_FENCE_EMIT(atomic_load) +CK_PR_FENCE_EMIT(atomic_store) +CK_PR_FENCE_EMIT(store_atomic) +CK_PR_FENCE_EMIT(load_atomic) +CK_PR_FENCE_EMIT(load_store) +CK_PR_FENCE_EMIT(store_load) +CK_PR_FENCE_EMIT(load) +CK_PR_FENCE_EMIT(store) +CK_PR_FENCE_EMIT(memory) +#elif defined(CK_MD_PSO) +/* + * Anything can be re-ordered with respect to stores. + * Otherwise, loads are executed in-order. + */ +CK_PR_FENCE_EMIT(atomic) +CK_PR_FENCE_NOOP(atomic_load) +CK_PR_FENCE_EMIT(atomic_store) +CK_PR_FENCE_EMIT(store_atomic) +CK_PR_FENCE_NOOP(load_atomic) +CK_PR_FENCE_EMIT(load_store) +CK_PR_FENCE_EMIT(store_load) +CK_PR_FENCE_NOOP(load) +CK_PR_FENCE_EMIT(store) +CK_PR_FENCE_EMIT(memory) +#elif defined(CK_MD_TSO) +/* + * Only loads are re-ordered and only with respect to + * prior stores. Atomic operations are serializing. + */ +CK_PR_FENCE_NOOP(atomic) +CK_PR_FENCE_NOOP(atomic_load) +CK_PR_FENCE_NOOP(atomic_store) +CK_PR_FENCE_NOOP(store_atomic) +CK_PR_FENCE_NOOP(load_atomic) +CK_PR_FENCE_NOOP(load_store) +CK_PR_FENCE_EMIT(store_load) +CK_PR_FENCE_NOOP(load) +CK_PR_FENCE_NOOP(store) +CK_PR_FENCE_NOOP(memory) +#endif /* CK_MD_TSO */ + +#undef CK_PR_FENCE_EMIT +#undef CK_PR_FENCE_NOOP + #define CK_PR_BIN(K, S, M, T, P, C) \ CK_CC_INLINE static void \ ck_pr_##K##_##S(M *target, T value) \ diff --git a/include/ck_rwlock.h b/include/ck_rwlock.h index 45593b0..81587ac 100644 --- a/include/ck_rwlock.h +++ b/include/ck_rwlock.h @@ -74,7 +74,8 @@ ck_rwlock_write_trylock(ck_rwlock_t *rw) if (ck_pr_fas_uint(&rw->writer, 1) != 0) return false; - ck_pr_fence_memory(); + ck_pr_fence_atomic_load(); + if (ck_pr_load_uint(&rw->n_readers) != 0) { ck_rwlock_write_unlock(rw); return false; @@ -90,7 +91,7 @@ ck_rwlock_write_lock(ck_rwlock_t *rw) while (ck_pr_fas_uint(&rw->writer, 1) != 0) ck_pr_stall(); - ck_pr_fence_memory(); + ck_pr_fence_atomic_load(); while (ck_pr_load_uint(&rw->n_readers) != 0) ck_pr_stall(); @@ -111,16 +112,15 @@ ck_rwlock_read_trylock(ck_rwlock_t *rw) * Serialize with respect to concurrent write * lock operation. */ - ck_pr_fence_memory(); - if (ck_pr_load_uint(&rw->writer) == 0) - goto leave; + ck_pr_fence_atomic_load(); + + if (ck_pr_load_uint(&rw->writer) == 0) { + ck_pr_fence_load(); + return true; + } + ck_pr_dec_uint(&rw->n_readers); return false; - -leave: - /* Acquire semantics are necessary. */ - ck_pr_fence_load(); - return true; } CK_CC_INLINE static void @@ -137,7 +137,8 @@ ck_rwlock_read_lock(ck_rwlock_t *rw) * Serialize with respect to concurrent write * lock operation. */ - ck_pr_fence_memory(); + ck_pr_fence_atomic_load(); + if (ck_pr_load_uint(&rw->writer) == 0) break; ck_pr_dec_uint(&rw->n_readers); @@ -180,7 +181,7 @@ ck_rwlock_recursive_write_lock(ck_rwlock_recursive_t *rw, unsigned int tid) while (ck_pr_cas_uint(&rw->rw.writer, 0, tid) == false) ck_pr_stall(); - ck_pr_fence_memory(); + ck_pr_fence_atomic_load(); while (ck_pr_load_uint(&rw->rw.n_readers) != 0) ck_pr_stall(); @@ -202,7 +203,7 @@ ck_rwlock_recursive_write_trylock(ck_rwlock_recursive_t *rw, unsigned int tid) if (ck_pr_cas_uint(&rw->rw.writer, 0, tid) == false) return false; - ck_pr_fence_memory(); + ck_pr_fence_atomic_load(); if (ck_pr_load_uint(&rw->rw.n_readers) != 0) { ck_pr_store_uint(&rw->rw.writer, 0); diff --git a/include/ck_spinlock.h b/include/ck_spinlock.h index 6b08789..b78051b 100644 --- a/include/ck_spinlock.h +++ b/include/ck_spinlock.h @@ -142,7 +142,7 @@ ck_spinlock_anderson_lock(struct ck_spinlock_anderson *lock, /* Prepare slot for potential re-use by another thread. */ ck_pr_store_uint(&lock->slots[position].locked, true); - ck_pr_fence_store(); + ck_pr_fence_memory(); *slot = lock->slots + position; return; @@ -194,7 +194,7 @@ ck_spinlock_fas_trylock(struct ck_spinlock_fas *lock) if (value == false) ck_pr_fence_memory(); - return (!value); + return !value; } CK_CC_INLINE static bool @@ -268,7 +268,7 @@ ck_spinlock_cas_trylock(struct ck_spinlock_cas *lock) if (value == false) ck_pr_fence_memory(); - return (!value); + return !value; } CK_CC_INLINE static bool @@ -408,9 +408,9 @@ ck_spinlock_dec_unlock(struct ck_spinlock_dec *lock) /* * If 16-bit or 32-bit increment is supported, implement support for * trylock functionality on availability of 32-bit or 64-bit fetch-and-add - * and compare-and-swap. + * and compare-and-swap. This code path is only applied to x86*. */ -#if defined(CK_MD_TSO) +#if defined(CK_MD_TSO) && (defined(__x86__) || defined(__x86_64__)) #if defined(CK_F_PR_FAA_32) && defined(CK_F_PR_INC_16) && defined(CK_F_PR_CAS_32) #define CK_SPINLOCK_TICKET_TYPE uint32_t #define CK_SPINLOCK_TICKET_TYPE_BASE uint16_t @@ -658,9 +658,9 @@ CK_CC_INLINE static bool ck_spinlock_mcs_trylock(struct ck_spinlock_mcs **queue, struct ck_spinlock_mcs *node) { - ck_pr_store_uint(&node->locked, true); - ck_pr_store_ptr(&node->next, NULL); - ck_pr_fence_store(); + node->locked = true; + node->next = NULL; + ck_pr_fence_store_atomic(); if (ck_pr_cas_ptr(queue, NULL, node) == true) { ck_pr_fence_load(); @@ -686,24 +686,24 @@ ck_spinlock_mcs_lock(struct ck_spinlock_mcs **queue, struct ck_spinlock_mcs *nod * In the case that there is a successor, let them know they must wait * for us to unlock. */ - ck_pr_store_uint(&node->locked, true); - ck_pr_store_ptr(&node->next, NULL); + node->locked = true; + node->next = NULL; + ck_pr_fence_store_atomic(); /* * Swap current tail with current lock request. If the swap operation * returns NULL, it means the queue was empty. If the queue was empty, * then the operation is complete. */ - ck_pr_fence_memory(); previous = ck_pr_fas_ptr(queue, node); - if (previous == NULL) - return; - - /* Let the previous lock holder know that we are waiting on them. */ - ck_pr_store_ptr(&previous->next, node); - while (ck_pr_load_uint(&node->locked) == true) - ck_pr_stall(); + if (previous != NULL) { + /* Let the previous lock holder know that we are waiting on them. */ + ck_pr_store_ptr(&previous->next, node); + while (ck_pr_load_uint(&node->locked) == true) + ck_pr_stall(); + } + ck_pr_fence_load(); return; } @@ -712,6 +712,8 @@ ck_spinlock_mcs_unlock(struct ck_spinlock_mcs **queue, struct ck_spinlock_mcs *n { struct ck_spinlock_mcs *next; + ck_pr_fence_memory(); + next = ck_pr_load_ptr(&node->next); if (next == NULL) { /* @@ -721,7 +723,6 @@ ck_spinlock_mcs_unlock(struct ck_spinlock_mcs **queue, struct ck_spinlock_mcs *n */ if (ck_pr_load_ptr(queue) == node && ck_pr_cas_ptr(queue, node, NULL) == true) { - ck_pr_fence_memory(); return; } @@ -740,9 +741,7 @@ ck_spinlock_mcs_unlock(struct ck_spinlock_mcs **queue, struct ck_spinlock_mcs *n } /* Allow the next lock operation to complete. */ - ck_pr_fence_memory(); ck_pr_store_uint(&next->locked, false); - return; } #endif /* CK_F_SPINLOCK_MCS */ diff --git a/include/gcc/ck_pr.h b/include/gcc/ck_pr.h index 505153d..c5231bd 100644 --- a/include/gcc/ck_pr.h +++ b/include/gcc/ck_pr.h @@ -31,9 +31,21 @@ #error Do not include this file directly, use ck_pr.h #endif +#include + +CK_CC_INLINE static void +ck_pr_barrier(void) +{ + + __asm__ __volatile__("" ::: "memory"); + return; +} + +#ifndef CK_F_PR +#define CK_F_PR + #include #include -#include /* * The following represent supported atomic operations. @@ -93,45 +105,32 @@ ck_pr_stall(void) return; } -/* - * Most target architectures do not require this. - */ -CK_CC_INLINE static void -ck_pr_fence_load_depends(void) -{ - - __sync_synchronize(); - return; -} - /* * Load and store fences are equivalent to full fences in the GCC port. */ #define CK_PR_FENCE(T) \ CK_CC_INLINE static void \ ck_pr_fence_strict_##T(void) \ - { \ - __sync_synchronize(); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ { \ __sync_synchronize(); \ } +CK_PR_FENCE(atomic) +CK_PR_FENCE(atomic_atomic) +CK_PR_FENCE(atomic_load) +CK_PR_FENCE(atomic_store) +CK_PR_FENCE(store_atomic) +CK_PR_FENCE(load_atomic) CK_PR_FENCE(load) +CK_PR_FENCE(load_load) +CK_PR_FENCE(load_store) CK_PR_FENCE(store) +CK_PR_FENCE(store_store) +CK_PR_FENCE(store_load) CK_PR_FENCE(memory) #undef CK_PR_FENCE -CK_CC_INLINE static void -ck_pr_barrier(void) -{ - - __asm__ __volatile__("" ::: "memory"); - return; -} - /* * Atomic compare and swap. */ @@ -275,5 +274,5 @@ CK_PR_UNARY_S(8, uint8_t) #undef CK_PR_UNARY_S #undef CK_PR_UNARY - +#endif /* !CK_F_PR */ #endif /* _CK_PR_GCC_H */ diff --git a/include/gcc/ppc/ck_pr.h b/include/gcc/ppc/ck_pr.h index e1f88a6..04d330d 100644 --- a/include/gcc/ppc/ck_pr.h +++ b/include/gcc/ppc/ck_pr.h @@ -41,6 +41,11 @@ */ #include "ck_f_pr.h" +/* + * Minimum interface requirement met. + */ +#define CK_F_PR + /* * This bounces the hardware thread from low to medium * priority. I am unsure of the benefits of this approach @@ -55,45 +60,26 @@ ck_pr_stall(void) return; } -#if defined(CK_MD_RMO) || defined(CK_MD_PSO) -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } -#else -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__("" ::: "memory"); \ - } -#endif /* !CK_MD_RMO && !CK_MD_PSO */ - -CK_PR_FENCE(load_depends, "") +#define CK_PR_FENCE(T, I) \ + CK_CC_INLINE static void \ + ck_pr_fence_strict_##T(void) \ + { \ + __asm__ __volatile__(I ::: "memory"); \ + } + +CK_PR_FENCE(atomic, "lwsync") +CK_PR_FENCE(atomic_store, "lwsync") +CK_PR_FENCE(atomic_load, "sync") +CK_PR_FENCE(store_atomic, "lwsync") +CK_PR_FENCE(load_atomic, "lwsync") CK_PR_FENCE(store, "lwsync") +CK_PR_FENCE(store_load, "sync") CK_PR_FENCE(load, "lwsync") +CK_PR_FENCE(load_store, "lwsync") CK_PR_FENCE(memory, "sync") #undef CK_PR_FENCE -CK_CC_INLINE static void -ck_pr_barrier(void) -{ - - __asm__ __volatile__("" ::: "memory"); - return; -} - #define CK_PR_LOAD(S, M, T, C, I) \ CK_CC_INLINE static T \ ck_pr_load_##S(const M *target) \ diff --git a/include/gcc/ppc64/ck_pr.h b/include/gcc/ppc64/ck_pr.h index 62aeb7a..e00db85 100644 --- a/include/gcc/ppc64/ck_pr.h +++ b/include/gcc/ppc64/ck_pr.h @@ -40,6 +40,11 @@ */ #include "ck_f_pr.h" +/* + * Minimum interface requirement met. + */ +#define CK_F_PR + /* * This bounces the hardware thread from low to medium * priority. I am unsure of the benefits of this approach @@ -54,49 +59,30 @@ ck_pr_stall(void) return; } -#if defined(CK_MD_RMO) || defined(CK_MD_PSO) -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } -#else -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__("" ::: "memory"); \ - } -#endif /* !CK_MD_RMO && !CK_MD_PSO */ +#define CK_PR_FENCE(T, I) \ + CK_CC_INLINE static void \ + ck_pr_fence_strict_##T(void) \ + { \ + __asm__ __volatile__(I ::: "memory"); \ + } /* * These are derived from: * http://www.ibm.com/developerworks/systems/articles/powerpc.html */ -CK_PR_FENCE(load_depends, "") +CK_PR_FENCE(atomic, "lwsync") +CK_PR_FENCE(atomic_store, "lwsync") +CK_PR_FENCE(atomic_load, "sync") +CK_PR_FENCE(store_atomic, "lwsync") +CK_PR_FENCE(load_atomic, "lwsync") CK_PR_FENCE(store, "lwsync") +CK_PR_FENCE(store_load, "sync") CK_PR_FENCE(load, "lwsync") +CK_PR_FENCE(load_store, "lwsync") CK_PR_FENCE(memory, "sync") #undef CK_PR_FENCE -CK_CC_INLINE static void -ck_pr_barrier(void) -{ - - __asm__ __volatile__("" ::: "memory"); - return; -} - #define CK_PR_LOAD(S, M, T, C, I) \ CK_CC_INLINE static T \ ck_pr_load_##S(const M *target) \ diff --git a/include/gcc/sparcv9/ck_pr.h b/include/gcc/sparcv9/ck_pr.h index ba2fc41..7bd5315 100644 --- a/include/gcc/sparcv9/ck_pr.h +++ b/include/gcc/sparcv9/ck_pr.h @@ -40,6 +40,11 @@ */ #include "ck_f_pr.h" +/* + * Minimum interface requirement met. + */ +#define CK_F_PR + /* * Order loads at the least. */ @@ -51,51 +56,30 @@ ck_pr_stall(void) return; } -#if defined(CK_MD_RMO) || defined(CK_MD_PSO) -/* - * If RMO is forced, then do not assume TSO model. - */ -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } -#else +#define CK_PR_FENCE(T, I) \ + CK_CC_INLINE static void \ + ck_pr_fence_strict_##T(void) \ + { \ + __asm__ __volatile__(I ::: "memory"); \ + } + /* - * By default, we will assume TSO model is used on SPARCv9. + * Atomic operations are treated as both load and store + * operations on SPARCv9. */ -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__("" ::: "memory"); \ - } -#endif /* !CK_MD_RMO && !CK_MD_PSO */ - -CK_PR_FENCE(load_depends, "") +CK_PR_FENCE(atomic, "membar #StoreStore") +CK_PR_FENCE(atomic_store, "membar #StoreStore") +CK_PR_FENCE(atomic_load, "membar #StoreLoad") +CK_PR_FENCE(store_atomic, "membar #StoreStore") +CK_PR_FENCE(load_atomic, "membar #LoadStore") CK_PR_FENCE(store, "membar #StoreStore") +CK_PR_FENCE(store_load, "membar #StoreLoad") CK_PR_FENCE(load, "membar #LoadLoad") +CK_PR_FENCE(load_store, "membar #LoadStore") CK_PR_FENCE(memory, "membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad") #undef CK_PR_FENCE -CK_CC_INLINE static void -ck_pr_barrier(void) -{ - - __asm__ __volatile__("" ::: "memory"); - return; -} - #define CK_PR_LOAD(S, M, T, C, I) \ CK_CC_INLINE static T \ ck_pr_load_##S(const M *target) \ diff --git a/include/gcc/x86/ck_pr.h b/include/gcc/x86/ck_pr.h index 38a0485..46583fe 100644 --- a/include/gcc/x86/ck_pr.h +++ b/include/gcc/x86/ck_pr.h @@ -63,52 +63,26 @@ ck_pr_stall(void) return; } -#if defined(CK_MD_RMO) || defined(CK_MD_PSO) #define CK_PR_FENCE(T, I) \ CK_CC_INLINE static void \ ck_pr_fence_strict_##T(void) \ { \ __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } -#else -/* - * IA32 has strong memory ordering guarantees, so memory - * fences are enabled if and only if the user specifies that - * that the program will be using non-temporal instructions. - * Otherwise, an optimization barrier is used in order to prevent - * compiler re-ordering of loads and stores across the barrier. - */ -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__("" ::: "memory"); \ } -#endif /* !CK_MD_RMO && !CK_MD_PSO */ +CK_PR_FENCE(atomic, "sfence") +CK_PR_FENCE(atomic_store, "sfence") +CK_PR_FENCE(atomic_load, "mfence") +CK_PR_FENCE(store_atomic, "sfence") +CK_PR_FENCE(load_atomic, "mfence") CK_PR_FENCE(load, "lfence") -CK_PR_FENCE(load_depends, "") +CK_PR_FENCE(load_store, "mfence") CK_PR_FENCE(store, "sfence") +CK_PR_FENCE(store_load, "mfence") CK_PR_FENCE(memory, "mfence") #undef CK_PR_FENCE -CK_CC_INLINE static void -ck_pr_barrier(void) -{ - - __asm__ __volatile__("" ::: "memory"); - return; -} - /* * Atomic fetch-and-store operations. */ diff --git a/include/gcc/x86_64/ck_pr.h b/include/gcc/x86_64/ck_pr.h index 84e893b..524b45e 100644 --- a/include/gcc/x86_64/ck_pr.h +++ b/include/gcc/x86_64/ck_pr.h @@ -62,52 +62,25 @@ ck_pr_stall(void) return; } -#if defined(CK_MD_RMO) || defined(CK_MD_PSO) #define CK_PR_FENCE(T, I) \ CK_CC_INLINE static void \ ck_pr_fence_strict_##T(void) \ { \ __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } -#else -/* - * IA32 has strong memory ordering guarantees, so memory - * fences are enabled if and only if the user specifies that - * that the program will be using non-temporal instructions. - * Otherwise, an optimization barrier is used in order to prevent - * compiler re-ordering of loads and stores across the barrier. - */ -#define CK_PR_FENCE(T, I) \ - CK_CC_INLINE static void \ - ck_pr_fence_strict_##T(void) \ - { \ - __asm__ __volatile__(I ::: "memory"); \ - } \ - CK_CC_INLINE static void ck_pr_fence_##T(void) \ - { \ - __asm__ __volatile__("" ::: "memory"); \ } -#endif /* !CK_MD_RMO && !CK_MD_PSO */ +CK_PR_FENCE(atomic_store, "sfence") +CK_PR_FENCE(atomic_load, "mfence") +CK_PR_FENCE(store_atomic, "sfence") +CK_PR_FENCE(load_atomic, "mfence") CK_PR_FENCE(load, "lfence") -CK_PR_FENCE(load_depends, "") +CK_PR_FENCE(load_store, "mfence") CK_PR_FENCE(store, "sfence") +CK_PR_FENCE(store_load, "mfence") CK_PR_FENCE(memory, "mfence") #undef CK_PR_FENCE -CK_CC_INLINE static void -ck_pr_barrier(void) -{ - - __asm__ __volatile__("" ::: "memory"); - return; -} - /* * Atomic fetch-and-store operations. */ diff --git a/regressions/ck_pr/benchmark/Makefile b/regressions/ck_pr/benchmark/Makefile index f43e792..6b6116e 100644 --- a/regressions/ck_pr/benchmark/Makefile +++ b/regressions/ck_pr/benchmark/Makefile @@ -3,16 +3,16 @@ all: ck_pr_cas_64 ck_pr_fas_64 ck_pr_cas_64_2 ck_pr_cas_64_2: ck_pr_cas_64_2.c - $(CC) $(CFLAGS) -o ck_pr_cas_64_2 ck_pr_cas_64_2.c + $(CC) $(CFLAGS) -o ck_pr_cas_64_2 ck_pr_cas_64_2.c -lm ck_pr_cas_64: ck_pr_cas_64.c - $(CC) $(CFLAGS) -o ck_pr_cas_64 ck_pr_cas_64.c + $(CC) $(CFLAGS) -o ck_pr_cas_64 ck_pr_cas_64.c -lm ck_pr_fas_64: ck_pr_fas_64.c - $(CC) $(CFLAGS) -o ck_pr_fas_64 ck_pr_fas_64.c + $(CC) $(CFLAGS) -o ck_pr_fas_64 ck_pr_fas_64.c -lm clean: rm -rf ck_pr_cas_64 ck_pr_fas_64 ck_pr_cas_64_2 *.dSYM *.exe include ../../../build/regressions.build -CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE -lm +CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE diff --git a/regressions/ck_spinlock/benchmark/Makefile b/regressions/ck_spinlock/benchmark/Makefile index 14bd901..1afeb37 100644 --- a/regressions/ck_spinlock/benchmark/Makefile +++ b/regressions/ck_spinlock/benchmark/Makefile @@ -14,67 +14,67 @@ OBJECTS=ck_ticket.THROUGHPUT ck_ticket.LATENCY \ all: $(OBJECTS) ck_spinlock.THROUGHPUT: ck_spinlock.c - $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_spinlock.THROUGHPUT ck_spinlock.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_spinlock.THROUGHPUT ck_spinlock.c -lm ck_spinlock.LATENCY: ck_spinlock.c - $(CC) -DLATENCY $(CFLAGS) -o ck_spinlock.LATENCY ck_spinlock.c + $(CC) -DLATENCY $(CFLAGS) -o ck_spinlock.LATENCY ck_spinlock.c -lm ck_ticket.THROUGHPUT: ck_ticket.c - $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_ticket.THROUGHPUT ck_ticket.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_ticket.THROUGHPUT ck_ticket.c -lm ck_ticket.LATENCY: ck_ticket.c - $(CC) -DLATENCY $(CFLAGS) -o ck_ticket.LATENCY ck_ticket.c + $(CC) -DLATENCY $(CFLAGS) -o ck_ticket.LATENCY ck_ticket.c -lm ck_mcs.THROUGHPUT: ck_mcs.c - $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_mcs.THROUGHPUT ck_mcs.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_mcs.THROUGHPUT ck_mcs.c -lm ck_mcs.LATENCY: ck_mcs.c - $(CC) -DLATENCY $(CFLAGS) -o ck_mcs.LATENCY ck_mcs.c + $(CC) -DLATENCY $(CFLAGS) -o ck_mcs.LATENCY ck_mcs.c -lm ck_dec.THROUGHPUT: ck_dec.c - $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_dec.THROUGHPUT ck_dec.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_dec.THROUGHPUT ck_dec.c -lm ck_dec.LATENCY: ck_dec.c - $(CC) -DLATENCY $(CFLAGS) -o ck_dec.LATENCY ck_dec.c + $(CC) -DLATENCY $(CFLAGS) -o ck_dec.LATENCY ck_dec.c -lm ck_cas.THROUGHPUT: ck_cas.c - $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_cas.THROUGHPUT ck_cas.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_cas.THROUGHPUT ck_cas.c -lm ck_cas.LATENCY: ck_cas.c - $(CC) -DLATENCY $(CFLAGS) -o ck_cas.LATENCY ck_cas.c + $(CC) -DLATENCY $(CFLAGS) -o ck_cas.LATENCY ck_cas.c -lm ck_fas.THROUGHPUT: ck_fas.c - $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_fas.THROUGHPUT ck_fas.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_fas.THROUGHPUT ck_fas.c -lm ck_fas.LATENCY: ck_fas.c - $(CC) -DLATENCY $(CFLAGS) -o ck_fas.LATENCY ck_fas.c + $(CC) -DLATENCY $(CFLAGS) -o ck_fas.LATENCY ck_fas.c -lm ck_clh.THROUGHPUT: ck_clh.c - $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_clh.THROUGHPUT ck_clh.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_clh.THROUGHPUT ck_clh.c -lm ck_clh.LATENCY: ck_clh.c - $(CC) -DLATENCY $(CFLAGS) -o ck_clh.LATENCY ck_clh.c + $(CC) -DLATENCY $(CFLAGS) -o ck_clh.LATENCY ck_clh.c -lm linux_spinlock.THROUGHPUT: linux_spinlock.c - $(CC) -DTHROUGHPUT $(CFLAGS) -o linux_spinlock.THROUGHPUT linux_spinlock.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o linux_spinlock.THROUGHPUT linux_spinlock.c -lm linux_spinlock.LATENCY: linux_spinlock.c - $(CC) -DLATENCY $(CFLAGS) -o linux_spinlock.LATENCY linux_spinlock.c + $(CC) -DLATENCY $(CFLAGS) -o linux_spinlock.LATENCY linux_spinlock.c -lm ck_ticket_pb.THROUGHPUT: ck_ticket_pb.c - $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_ticket_pb.THROUGHPUT ck_ticket_pb.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_ticket_pb.THROUGHPUT ck_ticket_pb.c -lm ck_ticket_pb.LATENCY: ck_ticket_pb.c - $(CC) -DLATENCY $(CFLAGS) -o ck_ticket_pb.LATENCY ck_ticket_pb.c + $(CC) -DLATENCY $(CFLAGS) -o ck_ticket_pb.LATENCY ck_ticket_pb.c -lm ck_anderson.THROUGHPUT: ck_anderson.c - $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_anderson.THROUGHPUT ck_anderson.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_anderson.THROUGHPUT ck_anderson.c -lm ck_anderson.LATENCY: ck_anderson.c - $(CC) -DLATENCY $(CFLAGS) -o ck_anderson.LATENCY ck_anderson.c + $(CC) -DLATENCY $(CFLAGS) -o ck_anderson.LATENCY ck_anderson.c -lm clean: rm -rf *.dSYM *.exe $(OBJECTS) include ../../../build/regressions.build -CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE -lm +CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE diff --git a/src/ck_epoch.c b/src/ck_epoch.c index 1904748..51016e5 100644 --- a/src/ck_epoch.c +++ b/src/ck_epoch.c @@ -162,6 +162,7 @@ ck_epoch_recycle(struct ck_epoch *global) record = ck_epoch_record_container(cursor); if (ck_pr_load_uint(&record->state) == CK_EPOCH_STATE_FREE) { + /* Serialize with respect to deferral list clean-up. */ ck_pr_fence_load(); state = ck_pr_fas_uint(&record->state, CK_EPOCH_STATE_USED); if (state == CK_EPOCH_STATE_FREE) { diff --git a/src/ck_hs.c b/src/ck_hs.c index 6c082e9..cf7d7c0 100644 --- a/src/ck_hs.c +++ b/src/ck_hs.c @@ -421,7 +421,7 @@ restart: */ if (slot != NULL && *slot != CK_HS_EMPTY) { ck_pr_inc_uint(&map->generation[h & CK_HS_G_MASK]); - ck_pr_fence_store(); + ck_pr_fence_atomic_store(); ck_pr_store_ptr(slot, CK_HS_TOMBSTONE); } } else {