added neon support for popcount (#861)

added neon support for popcount
This commit is contained in:
Alden Torres 2016-06-27 18:51:43 -04:00 committed by Arvid Norberg
parent 9233edae4e
commit f7515edc8b
6 changed files with 59 additions and 2 deletions

View File

@ -40,6 +40,7 @@ namespace libtorrent { namespace aux
// initialized by static initializers (in cpuid.cpp)
TORRENT_EXTRA_EXPORT extern bool sse42_support;
TORRENT_EXTRA_EXPORT extern bool mmx_support;
TORRENT_EXTRA_EXPORT extern bool arm_neon_support;
} }
#endif // TORRENT_CPUID_HPP_INCLUDED

View File

@ -538,5 +538,10 @@ POSSIBILITY OF SUCH DAMAGE.
# define TORRENT_HAS_BUILTIN_CLZ 0
#endif // TORRENT_HAS_BUILTIN_CLZ
#endif // TORRENT_CONFIG_HPP_INCLUDED
#if TORRENT_HAS_ARM && defined __ARM_NEON
# define TORRENT_HAS_ARM_NEON 1
#else
# define TORRENT_HAS_ARM_NEON 0
#endif // TORRENT_HAS_ARM_NEON
#endif // TORRENT_CONFIG_HPP_INCLUDED

View File

@ -37,6 +37,10 @@ POSSIBILITY OF SUCH DAMAGE.
#include <intrin.h>
#endif
#if TORRENT_HAS_ARM_NEON
#include <arm_neon.h>
#endif
namespace libtorrent
{
bool bitfield::all_set() const
@ -79,6 +83,25 @@ namespace libtorrent
}
#endif // TORRENT_HAS_SSE
#if TORRENT_HAS_ARM_NEON
if (aux::arm_neon_support)
{
for (int i = 0; i < words; ++i)
{
uint8x8_t const in_val = vld1_u8((unsigned char *) &m_buf[i]);
uint8x8_t const cnt8x8_val = vcnt_u8(in_val);
uint16x4_t const cnt16x4_val = vpaddl_u8(cnt8x8_val);
uint32x2_t const cnt32x2_val = vpaddl_u16(cnt16x4_val);
uint32_t cnt;
vst1_u32(&cnt, cnt32x2_val);
ret += cnt;
}
return ret;
}
#endif // TORRENT_HAS_ARM_NEON
for (int i = 0; i < words; ++i)
{
std::uint32_t v = m_buf[i];

View File

@ -44,6 +44,10 @@ POSSIBILITY OF SUCH DAMAGE.
#include <cstring> // for std::memset
#endif
#if TORRENT_HAS_ARM
#include <sys/auxv.h>
#endif
namespace libtorrent { namespace aux
{
namespace {
@ -87,8 +91,24 @@ namespace libtorrent { namespace aux
#endif
}
bool supports_arm_neon()
{
#if TORRENT_HAS_ARM_NEON
#if defined __arm__
//return (getauxval(AT_HWCAP) & HWCAP_NEON);
return (getauxval(16) & (1 << 12));
#elif defined __aarch64__
//return (getauxval(AT_HWCAP) & HWCAP_ASIMD);
return (getauxval(16) & (1 << 1));
#endif // TORRENT_HAS_ARM
#else
return false;
#endif
}
} // anonymous namespace
bool sse42_support = supports_sse42();
bool mmx_support = supports_mmx();
bool arm_neon_support = supports_arm_neon();
} }

View File

@ -125,7 +125,6 @@ test-suite libtorrent :
test_socket_io.cpp
# test_random.cpp
test_gzip.cpp
test_bitfield.cpp
test_part_file.cpp
test_peer_list.cpp
test_torrent_info.cpp
@ -160,6 +159,7 @@ test-suite libtorrent :
]
[ run test_sha1_hash.cpp ]
[ run test_bitfield.cpp ]
[ run test_receive_buffer.cpp ]
[ run test_alert_manager.cpp ]
[ run test_direct_dht.cpp ]
@ -271,6 +271,7 @@ explicit win-tests ;
alias arm-tests :
test_sha1_hash
test_bitfield
;
explicit arm-tests ;

View File

@ -32,6 +32,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "test.hpp"
#include "libtorrent/bitfield.hpp"
#include "libtorrent/aux_/cpuid.hpp"
#include <stdlib.h>
using namespace libtorrent;
@ -187,4 +188,10 @@ TORRENT_TEST(bitfield)
TEST_EQUAL(test1.count(), 32 + 8 + 2);
TEST_EQUAL(test1.all_set(), true);
}
#if TORRENT_HAS_ARM
TORRENT_ASSERT(aux::arm_neon_support);
#else
TORRENT_ASSERT(!aux::arm_neon_support);
#endif
}