From f7515edc8b56606a3c49e6fde83f9d5e513155e8 Mon Sep 17 00:00:00 2001 From: Alden Torres Date: Mon, 27 Jun 2016 18:51:43 -0400 Subject: [PATCH] added neon support for popcount (#861) added neon support for popcount --- include/libtorrent/aux_/cpuid.hpp | 1 + include/libtorrent/config.hpp | 7 ++++++- src/bitfield.cpp | 23 +++++++++++++++++++++++ src/cpuid.cpp | 20 ++++++++++++++++++++ test/Jamfile | 3 ++- test/test_bitfield.cpp | 7 +++++++ 6 files changed, 59 insertions(+), 2 deletions(-) diff --git a/include/libtorrent/aux_/cpuid.hpp b/include/libtorrent/aux_/cpuid.hpp index 0165e5172..16e000a2a 100644 --- a/include/libtorrent/aux_/cpuid.hpp +++ b/include/libtorrent/aux_/cpuid.hpp @@ -40,6 +40,7 @@ namespace libtorrent { namespace aux // initialized by static initializers (in cpuid.cpp) TORRENT_EXTRA_EXPORT extern bool sse42_support; TORRENT_EXTRA_EXPORT extern bool mmx_support; + TORRENT_EXTRA_EXPORT extern bool arm_neon_support; } } #endif // TORRENT_CPUID_HPP_INCLUDED diff --git a/include/libtorrent/config.hpp b/include/libtorrent/config.hpp index ae17c4f20..421884645 100644 --- a/include/libtorrent/config.hpp +++ b/include/libtorrent/config.hpp @@ -538,5 +538,10 @@ POSSIBILITY OF SUCH DAMAGE. # define TORRENT_HAS_BUILTIN_CLZ 0 #endif // TORRENT_HAS_BUILTIN_CLZ -#endif // TORRENT_CONFIG_HPP_INCLUDED +#if TORRENT_HAS_ARM && defined __ARM_NEON +# define TORRENT_HAS_ARM_NEON 1 +#else +# define TORRENT_HAS_ARM_NEON 0 +#endif // TORRENT_HAS_ARM_NEON +#endif // TORRENT_CONFIG_HPP_INCLUDED diff --git a/src/bitfield.cpp b/src/bitfield.cpp index 7a3b963dd..a3fe4940d 100644 --- a/src/bitfield.cpp +++ b/src/bitfield.cpp @@ -37,6 +37,10 @@ POSSIBILITY OF SUCH DAMAGE. #include #endif +#if TORRENT_HAS_ARM_NEON +#include +#endif + namespace libtorrent { bool bitfield::all_set() const @@ -79,6 +83,25 @@ namespace libtorrent } #endif // TORRENT_HAS_SSE +#if TORRENT_HAS_ARM_NEON + if (aux::arm_neon_support) + { + for (int i = 0; i < words; ++i) + { + uint8x8_t const in_val = vld1_u8((unsigned char *) &m_buf[i]); + uint8x8_t const cnt8x8_val = vcnt_u8(in_val); + uint16x4_t const cnt16x4_val = vpaddl_u8(cnt8x8_val); + uint32x2_t const cnt32x2_val = vpaddl_u16(cnt16x4_val); + uint32_t cnt; + vst1_u32(&cnt, cnt32x2_val); + + ret += cnt; + } + + return ret; + } +#endif // TORRENT_HAS_ARM_NEON + for (int i = 0; i < words; ++i) { std::uint32_t v = m_buf[i]; diff --git a/src/cpuid.cpp b/src/cpuid.cpp index 9d935fc47..c2d6e939e 100644 --- a/src/cpuid.cpp +++ b/src/cpuid.cpp @@ -44,6 +44,10 @@ POSSIBILITY OF SUCH DAMAGE. #include // for std::memset #endif +#if TORRENT_HAS_ARM +#include +#endif + namespace libtorrent { namespace aux { namespace { @@ -87,8 +91,24 @@ namespace libtorrent { namespace aux #endif } + bool supports_arm_neon() + { +#if TORRENT_HAS_ARM_NEON +#if defined __arm__ + //return (getauxval(AT_HWCAP) & HWCAP_NEON); + return (getauxval(16) & (1 << 12)); +#elif defined __aarch64__ + //return (getauxval(AT_HWCAP) & HWCAP_ASIMD); + return (getauxval(16) & (1 << 1)); +#endif // TORRENT_HAS_ARM +#else + return false; +#endif + } + } // anonymous namespace bool sse42_support = supports_sse42(); bool mmx_support = supports_mmx(); + bool arm_neon_support = supports_arm_neon(); } } diff --git a/test/Jamfile b/test/Jamfile index 553e3ad85..cf22a0376 100644 --- a/test/Jamfile +++ b/test/Jamfile @@ -125,7 +125,6 @@ test-suite libtorrent : test_socket_io.cpp # test_random.cpp test_gzip.cpp - test_bitfield.cpp test_part_file.cpp test_peer_list.cpp test_torrent_info.cpp @@ -160,6 +159,7 @@ test-suite libtorrent : ] [ run test_sha1_hash.cpp ] + [ run test_bitfield.cpp ] [ run test_receive_buffer.cpp ] [ run test_alert_manager.cpp ] [ run test_direct_dht.cpp ] @@ -271,6 +271,7 @@ explicit win-tests ; alias arm-tests : test_sha1_hash + test_bitfield ; explicit arm-tests ; diff --git a/test/test_bitfield.cpp b/test/test_bitfield.cpp index d6f04ca2b..b970f2c22 100644 --- a/test/test_bitfield.cpp +++ b/test/test_bitfield.cpp @@ -32,6 +32,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "test.hpp" #include "libtorrent/bitfield.hpp" +#include "libtorrent/aux_/cpuid.hpp" #include using namespace libtorrent; @@ -187,4 +188,10 @@ TORRENT_TEST(bitfield) TEST_EQUAL(test1.count(), 32 + 8 + 2); TEST_EQUAL(test1.all_set(), true); } + +#if TORRENT_HAS_ARM + TORRENT_ASSERT(aux::arm_neon_support); +#else + TORRENT_ASSERT(!aux::arm_neon_support); +#endif }